1 """@namespace IMP.pmi.mmcif
2 @brief Support for the mmCIF file format.
4 IMP has basic support for writing out files in mmCIF format, for
5 deposition in [PDB-IHM](https://pdb-ihm.org/).
6 mmCIF files are currently generated by creating an
7 IMP.pmi.mmcif.ProtocolOutput class, and attaching it to an
8 IMP.pmi.representation.Representation object, after which any
9 generated models and metadata are collected and output as mmCIF.
38 import ihm.representation
40 import ihm.cross_linkers
44 def _assign_id(obj, seen_objs, obj_by_id):
45 """Assign a unique ID to obj, and track all ids in obj_by_id."""
46 if obj
not in seen_objs:
47 if not hasattr(obj,
'id'):
49 obj.id = len(obj_by_id)
50 seen_objs[obj] = obj.id
52 obj.id = seen_objs[obj]
55 def _get_by_residue(p):
56 """Determine whether the given particle represents a specific residue
57 or a more coarse-grained object."""
61 class _ComponentMapper:
62 """Map a Particle to a component name"""
63 def __init__(self, prot):
66 self.name =
'cif-output'
67 self.o.dictionary_pdbs[self.name] = self.prot
68 self.o._init_dictchain(self.name, self.prot,
69 multichar_chain=
True, mmcif=
True)
71 def __getitem__(self, p):
72 protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
77 """Map a Particle to an asym_unit"""
78 def __init__(self, simo, prot):
80 self._cm = _ComponentMapper(prot)
81 self._seen_ranges = {}
83 def __getitem__(self, p):
84 protname = self._cm[p]
85 return self.simo.asym_units[protname]
87 def get_feature(self, ps):
88 """Get an ihm.restraint.Feature that covers the given particles"""
96 rng = asym(rind, rind)
100 rng = asym(rinds[0], rinds[-1])
102 raise ValueError(
"Unsupported particle type %s" % str(p))
104 if len(rngs) > 0
and rngs[-1].asym == asym \
105 and rngs[-1].seq_id_range[1] == rng.seq_id_range[0] - 1:
106 rngs[-1].seq_id_range = (rngs[-1].seq_id_range[0],
113 if hrngs
in self._seen_ranges:
114 return self._seen_ranges[hrngs]
116 feat = ihm.restraint.ResidueFeature(rngs)
117 self._seen_ranges[hrngs] = feat
122 def __init__(self, system):
124 self.modeller_used = self.phyre2_used =
False
125 self.pmi = ihm.Software(
126 name=
"IMP PMI module",
127 version=IMP.pmi.__version__,
128 classification=
"integrative model building",
129 description=
"integrative model building",
130 location=
'https://integrativemodeling.org')
131 self.imp = ihm.Software(
132 name=
"Integrative Modeling Platform (IMP)",
133 version=IMP.__version__,
134 classification=
"integrative model building",
135 description=
"integrative model building",
136 location=
'https://integrativemodeling.org')
139 if hasattr(self.imp,
'citation'):
140 javi =
'Vel\u00e1zquez-Muriel J'
141 self.imp.citation = ihm.Citation(
143 title=
'Putting the pieces together: integrative modeling '
144 'platform software for structure determination of '
145 'macromolecular assemblies',
146 journal=
'PLoS Biol', volume=10, page_range=
'e1001244',
148 authors=[
'Russel D',
'Lasker K',
'Webb B', javi,
'Tjioe E',
149 'Schneidman-Duhovny D',
'Peterson B',
'Sali A'],
150 doi=
'10.1371/journal.pbio.1001244')
151 self.pmi.citation = ihm.Citation(
153 title=
'Modeling Biological Complexes Using Integrative '
154 'Modeling Platform.',
155 journal=
'Methods Mol Biol', volume=2022, page_range=(353, 377),
157 authors=[
'Saltzberg D',
'Greenberg CH',
'Viswanath S',
158 'Chemmama I',
'Webb B',
'Pellarin R',
'Echeverria I',
160 doi=
'10.1007/978-1-4939-9608-7_15')
161 self.system.software.extend([self.pmi, self.imp])
163 def set_modeller_used(self, version, date):
164 if self.modeller_used:
166 self.modeller_used =
True
168 name=
'MODELLER', classification=
'comparative modeling',
169 description=
'Comparative modeling by satisfaction '
170 'of spatial restraints, build ' + date,
171 location=
'https://salilab.org/modeller/', version=version)
172 self.system.software.append(s)
173 if hasattr(s,
'citation'):
174 s.citation = ihm.Citation(
176 title=
'Comparative protein modelling by satisfaction of '
177 'spatial restraints.',
178 journal=
'J Mol Biol', volume=234, page_range=(779, 815),
179 year=1993, authors=[
'Sali A',
'Blundell TL'],
180 doi=
'10.1006/jmbi.1993.1626')
182 def set_phyre2_used(self):
185 self.phyre2_used =
True
187 name=
'Phyre2', classification=
'protein homology modeling',
188 description=
'Protein Homology/analogY Recognition Engine V 2.0',
189 version=
'2.0', location=
'http://www.sbg.bio.ic.ac.uk/~phyre2/')
190 if hasattr(s,
'citation'):
191 s.citation = ihm.Citation(
193 title=
'The Phyre2 web portal for protein modeling, '
194 'prediction and analysis.',
195 journal=
'Nat Protoc', volume=10, page_range=(845, 858),
196 authors=[
'Kelley LA',
'Mezulis S',
'Yates CM',
'Wass MN',
198 year=2015, doi=
'10.1038/nprot.2015.053')
199 self.system.software.append(s)
202 def _get_fragment_is_rigid(fragment):
203 """Determine whether a fragment is modeled rigidly"""
209 class _PDBFragment(ihm.representation.ResidueSegment):
210 """Record details about part of a PDB file used as input
212 def __init__(self, state, component, start, end, pdb_offset,
213 pdbname, chain, hier, asym_unit):
216 asym_unit=asym_unit.pmi_range(start, end),
217 rigid=
None, primitive=
'sphere')
218 self.component, self.start, self.end, self.offset, self.pdbname \
219 = component, start, end, pdb_offset, pdbname
220 self.state, self.chain, self.hier = state, chain, hier
225 rigid = property(
lambda self: _get_fragment_is_rigid(self),
226 lambda self, val:
None)
228 def combine(self, other):
232 class _BeadsFragment(ihm.representation.FeatureSegment):
233 """Record details about beads used to represent part of a component."""
236 def __init__(self, state, component, start, end, count, hier, asym_unit):
238 asym_unit=asym_unit(start, end), rigid=
None, primitive=
'sphere',
240 self.state, self.component, self.hier = state, component, hier
242 rigid = property(
lambda self: _get_fragment_is_rigid(self),
243 lambda self, val:
None)
245 def combine(self, other):
247 if (type(other) == type(self)
and
248 other.asym_unit.seq_id_range[0]
249 == self.asym_unit.seq_id_range[1] + 1):
250 self.asym_unit.seq_id_range = (self.asym_unit.seq_id_range[0],
251 other.asym_unit.seq_id_range[1])
252 self.count += other.count
256 class _AllModelRepresentations:
257 def __init__(self, simo):
261 self.fragments = OrderedDict()
262 self._all_representations = {}
264 def copy_component(self, state, name, original, asym_unit):
265 """Copy all representation for `original` in `state` to `name`"""
268 newf.asym_unit = asym_unit(*f.asym_unit.seq_id_range)
270 for rep
in self.fragments:
271 if original
in self.fragments[rep]:
272 if name
not in self.fragments[rep]:
273 self.fragments[rep][name] = OrderedDict()
274 self.fragments[rep][name][state] = [
275 copy_frag(f)
for f
in self.fragments[rep][original][state]]
278 first_state = list(self.fragments[rep][name].keys())[0]
279 if state
is first_state:
280 representation = self._all_representations[rep]
281 representation.extend(self.fragments[rep][name][state])
283 def add_fragment(self, state, representation, fragment):
284 """Add a model fragment."""
285 comp = fragment.component
286 id_rep = id(representation)
287 self._all_representations[id_rep] = representation
288 if id_rep
not in self.fragments:
289 self.fragments[id_rep] = OrderedDict()
290 if comp
not in self.fragments[id_rep]:
291 self.fragments[id_rep][comp] = OrderedDict()
292 if state
not in self.fragments[id_rep][comp]:
293 self.fragments[id_rep][comp][state] = []
294 fragments = self.fragments[id_rep][comp][state]
295 if len(fragments) == 0
or not fragments[-1].combine(fragment):
296 fragments.append(fragment)
299 first_state = list(self.fragments[id_rep][comp].keys())[0]
300 if state
is first_state:
301 representation.append(fragment)
305 """Track all datasets generated by PMI and add them to the ihm.System"""
306 def __init__(self, system):
309 self._datasets_by_state = {}
310 self._restraints_by_state = {}
312 def get_all_group(self, state):
313 """Get a DatasetGroup encompassing all datasets so far in this state"""
317 g = ihm.dataset.DatasetGroup(
318 self._datasets_by_state.get(state, [])
319 + [r.dataset
for r
in self._restraints_by_state.get(state, [])
323 def add(self, state, dataset):
324 """Add a new dataset."""
325 self._datasets.append(dataset)
326 if state
not in self._datasets_by_state:
327 self._datasets_by_state[state] = []
328 self._datasets_by_state[state].append(dataset)
331 self.system.orphan_datasets.append(dataset)
335 """Add the dataset for a restraint"""
336 if state
not in self._restraints_by_state:
337 self._restraints_by_state[state] = []
338 self._restraints_by_state[state].append(restraint)
341 class _CrossLinkRestraint(ihm.restraint.CrossLinkRestraint):
342 """Restrain to a set of cross-links"""
345 _label_map = {
'wtDSS':
'DSS',
'scDSS':
'DSS',
'scEDC':
'EDC'}
346 _descriptor_map = {
'DSS': ihm.cross_linkers.dss,
347 'EDC': ihm.cross_linkers.edc}
349 def __init__(self, pmi_restraint):
350 self.pmi_restraint = pmi_restraint
353 linker = getattr(self.pmi_restraint,
'linker',
None)
354 label = self.pmi_restraint.label
357 dataset=self.pmi_restraint.dataset,
358 linker=linker
or self._get_chem_descriptor(label))
361 def _get_chem_descriptor(cls, label):
363 label = cls._label_map.get(label, label)
364 if label
not in cls._descriptor_map:
368 d = ihm.ChemDescriptor(label)
369 cls._descriptor_map[label] = d
370 return cls._descriptor_map[label]
372 def _set_psi_sigma(self, model):
375 if model.m != self.pmi_restraint.model:
377 for resolution
in self.pmi_restraint.sigma_dictionary:
378 statname =
'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, self.label)
379 if model.stats
and statname
in model.stats:
380 sigma = float(model.stats[statname])
381 p = self.pmi_restraint.sigma_dictionary[resolution][0]
382 old_values.append((p, p.get_scale()))
384 for psiindex
in self.pmi_restraint.psi_dictionary:
385 statname =
'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, self.label)
386 if model.stats
and statname
in model.stats:
387 psi = float(model.stats[statname])
388 p = self.pmi_restraint.psi_dictionary[psiindex][0]
389 old_values.append((p, p.get_scale()))
393 return list(reversed(old_values))
395 def add_fits_from_model_statfile(self, model):
397 old_values = self._set_psi_sigma(model)
401 for xl
in self.cross_links:
403 xl.fits[model] = ihm.restraint.CrossLinkFit(
404 psi=xl.psi, sigma1=xl.sigma1, sigma2=xl.sigma2)
406 for p, scale
in old_values:
410 def __set_dataset(self, val):
411 self.pmi_restraint.dataset = val
412 dataset = property(
lambda self: self.pmi_restraint.dataset,
416 def get_asym_mapper_for_state(simo, state, asym_map):
417 asym = asym_map.get(state,
None)
419 asym = _AsymMapper(simo, state.prot)
420 asym_map[state] = asym
427 psi = property(
lambda self: self.psi_p.get_scale(),
428 lambda self, val:
None)
429 sigma1 = property(
lambda self: self.sigma1_p.get_scale(),
430 lambda self, val:
None)
431 sigma2 = property(
lambda self: self.sigma2_p.get_scale(),
432 lambda self, val:
None)
435 class _ResidueCrossLink(ihm.restraint.ResidueCrossLink, _PMICrossLink):
439 class _FeatureCrossLink(ihm.restraint.FeatureCrossLink, _PMICrossLink):
443 class _EM2DRestraint(ihm.restraint.EM2DRestraint):
444 def __init__(self, state, pmi_restraint, image_number, resolution,
445 pixel_size, image_resolution, projection_number,
447 self.pmi_restraint, self.image_number = pmi_restraint, image_number
449 dataset=pmi_restraint.datasets[image_number],
450 assembly=state.modeled_assembly,
451 segment=
False, number_raw_micrographs=micrographs_number,
452 pixel_size_width=pixel_size, pixel_size_height=pixel_size,
453 image_resolution=image_resolution,
454 number_of_projections=projection_number)
457 def __get_dataset(self):
458 return self.pmi_restraint.datasets[self.image_number]
460 def __set_dataset(self, val):
461 self.pmi_restraint.datasets[self.image_number] = val
463 dataset = property(__get_dataset, __set_dataset)
465 def add_fits_from_model_statfile(self, model):
466 ccc = self._get_cross_correlation(model)
467 transform = self._get_transformation(model)
468 rot = transform.get_rotation()
469 rm = [[e
for e
in rot.get_rotation_matrix_row(i)]
for i
in range(3)]
470 self.fits[model] = ihm.restraint.EM2DRestraintFit(
471 cross_correlation_coefficient=ccc,
473 tr_vector=transform.get_translation())
475 def _get_transformation(self, model):
476 """Get the transformation that places the model on the image"""
477 stats = model.em2d_stats
or model.stats
478 prefix =
'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
479 self.image_number + 1)
480 r = [float(stats[prefix +
'_Rotation%d' % i])
for i
in range(4)]
481 t = [float(stats[prefix +
'_Translation%d' % i])
485 inv = model.transform.get_inverse()
489 def _get_cross_correlation(self, model):
490 """Get the cross correlation coefficient between the model projection
492 stats = model.em2d_stats
or model.stats
493 return float(stats[
'ElectronMicroscopy2D_%s_Image%d_CCC'
494 % (self.pmi_restraint.label,
495 self.image_number + 1)])
498 class _EM3DRestraint(ihm.restraint.EM3DRestraint):
500 def __init__(self, simo, state, pmi_restraint, target_ps, densities):
501 self.pmi_restraint = pmi_restraint
503 dataset=pmi_restraint.dataset,
504 assembly=self._get_assembly(densities, simo, state),
505 fitting_method=
'Gaussian mixture models',
506 number_of_gaussians=len(target_ps))
509 def __set_dataset(self, val):
510 self.pmi_restraint.dataset = val
511 dataset = property(
lambda self: self.pmi_restraint.dataset,
514 def _get_assembly(self, densities, simo, state):
515 """Get the Assembly that this restraint acts on"""
516 cm = _ComponentMapper(state.prot)
519 components[cm[d]] =
None
520 a = simo._get_subassembly(
521 components, name=
"EM subassembly",
522 description=
"All components that fit the EM map")
525 def add_fits_from_model_statfile(self, model):
526 ccc = self._get_cross_correlation(model)
527 self.fits[model] = ihm.restraint.EM3DRestraintFit(
528 cross_correlation_coefficient=ccc)
530 def _get_cross_correlation(self, model):
531 """Get the cross correlation coefficient between the model
533 if model.stats
is not None:
534 return float(model.stats[
'GaussianEMRestraint_%s_CCC'
535 % self.pmi_restraint.label])
538 class _GeometricRestraint(ihm.restraint.GeometricRestraint):
540 def __init__(self, simo, state, pmi_restraint, geometric_object,
541 feature, distance, sigma):
542 self.pmi_restraint = pmi_restraint
544 dataset=pmi_restraint.dataset,
545 geometric_object=geometric_object, feature=feature,
546 distance=distance, harmonic_force_constant=1. / sigma,
550 def __set_dataset(self, val):
551 self.pmi_restraint.dataset = val
552 dataset = property(
lambda self: self.pmi_restraint.dataset,
556 class _ReplicaExchangeProtocolStep(ihm.protocol.Step):
557 def __init__(self, state, rex):
558 if rex.monte_carlo_sample_objects
is not None:
559 method =
'Replica exchange monte carlo'
561 method =
'Replica exchange molecular dynamics'
562 self.monte_carlo_temperature = rex.vars[
'monte_carlo_temperature']
563 self.replica_exchange_minimum_temperature = \
564 rex.vars[
'replica_exchange_minimum_temperature']
565 self.replica_exchange_maximum_temperature = \
566 rex.vars[
'replica_exchange_maximum_temperature']
568 assembly=state.modeled_assembly,
570 method=method, name=
'Sampling',
571 num_models_begin=
None,
572 num_models_end=rex.vars[
"number_of_frames"],
573 multi_scale=
True, multi_state=
False, ordered=
False, ensemble=
True)
576 class _ReplicaExchangeProtocolDumper(ihm.dumper.Dumper):
577 """Write IMP-specific information about replica exchange to mmCIF.
578 Note that IDs will have already been assigned by python-ihm's
579 standard modeling protocol dumper."""
580 def dump(self, system, writer):
581 with writer.loop(
"_imp_replica_exchange_protocol",
582 [
"protocol_id",
"step_id",
"monte_carlo_temperature",
583 "replica_exchange_minimum_temperature",
584 "replica_exchange_maximum_temperature"])
as lp:
585 for p
in system._all_protocols():
587 if isinstance(s, _ReplicaExchangeProtocolStep):
588 self._dump_step(p, s, lp)
590 def _dump_step(self, p, s, lp):
591 mintemp = s.replica_exchange_minimum_temperature
592 maxtemp = s.replica_exchange_maximum_temperature
593 lp.write(protocol_id=p._id, step_id=s._id,
594 monte_carlo_temperature=s.monte_carlo_temperature,
595 replica_exchange_minimum_temperature=mintemp,
596 replica_exchange_maximum_temperature=maxtemp)
599 class _ReplicaExchangeProtocolHandler(ihm.reader.Handler):
600 category =
'_imp_replica_exchange_protocol'
602 """Read IMP-specific information about replica exchange from mmCIF."""
603 def __call__(self, protocol_id, step_id, monte_carlo_temperature,
604 replica_exchange_minimum_temperature,
605 replica_exchange_maximum_temperature):
606 p = self.sysr.protocols.get_by_id(protocol_id)
608 s = p.steps[int(step_id)-1]
610 s.__class__ = _ReplicaExchangeProtocolStep
611 s.monte_carlo_temperature = \
612 self.get_float(monte_carlo_temperature)
613 s.replica_exchange_minimum_temperature = \
614 self.get_float(replica_exchange_minimum_temperature)
615 s.replica_exchange_maximum_temperature = \
616 self.get_float(replica_exchange_maximum_temperature)
619 class _SimpleProtocolStep(ihm.protocol.Step):
620 def __init__(self, state, num_models_end, method):
622 assembly=state.modeled_assembly,
624 method=method, name=
'Sampling',
625 num_models_begin=
None,
626 num_models_end=num_models_end,
627 multi_scale=
True, multi_state=
False, ordered=
False,
632 """Represent a single chain in a Model"""
633 def __init__(self, pmi_chain_id, asym_unit):
634 self.pmi_chain_id, self.asym_unit = pmi_chain_id, asym_unit
638 def add(self, xyz, atom_type, residue_type, residue_index,
639 all_indexes, radius):
640 if atom_type
is None:
641 self.spheres.append((xyz, residue_type, residue_index,
642 all_indexes, radius))
644 self.atoms.append((xyz, atom_type, residue_type, residue_index,
645 all_indexes, radius))
646 orig_comp = property(
lambda self: self.comp)
649 class _TransformedChain:
650 """Represent a chain that is a transformed version of another"""
651 def __init__(self, orig_chain, asym_unit, transform):
652 self.orig_chain, self.asym_unit = orig_chain, asym_unit
653 self.transform = transform
655 def __get_spheres(self):
656 for (xyz, residue_type, residue_index, all_indexes,
657 radius)
in self.orig_chain.spheres:
658 yield (self.transform * xyz, residue_type, residue_index,
660 spheres = property(__get_spheres)
662 def __get_atoms(self):
663 for (xyz, atom_type, residue_type, residue_index, all_indexes,
664 radius)
in self.orig_chain.atoms:
665 yield (self.transform * xyz, atom_type, residue_type,
666 residue_index, all_indexes, radius)
667 atoms = property(__get_atoms)
669 entity = property(
lambda self: self.orig_chain.entity)
670 orig_comp = property(
lambda self: self.orig_chain.comp)
674 def __init__(self, component, simo):
675 self._seqranges = simo._exclude_coords.get(component, [])
677 def is_excluded(self, indexes):
678 """Return True iff the given sequence range is excluded."""
679 for seqrange
in self._seqranges:
680 if indexes[0] >= seqrange[0]
and indexes[-1] <= seqrange[1]:
684 class _Model(ihm.model.Model):
685 def __init__(self, prot, simo, protocol, assembly, representation):
686 super().__init__(assembly=assembly, protocol=protocol,
687 representation=representation)
688 self.simo = weakref.proxy(simo)
694 self.em2d_stats =
None
697 self._is_restrained =
True
700 self.m = prot.get_model()
701 o.dictionary_pdbs[name] = prot
702 o._init_dictchain(name, prot, multichar_chain=
True)
703 (particle_infos_for_pdb,
704 self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
706 self._make_spheres_atoms(particle_infos_for_pdb, o, name, simo)
709 def all_chains(self, simo):
710 """Yield all chains, including transformed ones"""
712 for c
in self.chains:
714 chain_for_comp[c.comp] = c
715 for tc
in simo._transformed_components:
716 orig_chain = chain_for_comp.get(tc.original,
None)
718 asym = simo.asym_units[tc.name]
719 c = _TransformedChain(orig_chain, asym, tc.transform)
723 def _make_spheres_atoms(self, particle_infos_for_pdb, o, name, simo):
724 entity_for_chain = {}
727 for protname, chain_id
in o.dictchain[name].items():
728 if protname
in simo.entities:
729 entity_for_chain[chain_id] = simo.entities[protname]
732 pn = protname.split(
'.')[0]
733 entity_for_chain[chain_id] = simo.entities[pn]
734 comp_for_chain[chain_id] = protname
738 correct_asym[chain_id] = simo.asym_units[protname]
745 for (xyz, atom_type, residue_type, chain_id, residue_index,
746 all_indexes, radius)
in particle_infos_for_pdb:
747 if chain
is None or chain.pmi_chain_id != chain_id:
748 chain = _Chain(chain_id, correct_asym[chain_id])
749 chain.entity = entity_for_chain[chain_id]
750 chain.comp = comp_for_chain[chain_id]
751 self.chains.append(chain)
752 excluder = _Excluder(chain.comp, simo)
753 if not excluder.is_excluded(all_indexes
if all_indexes
754 else [residue_index]):
755 chain.add(xyz, atom_type, residue_type, residue_index,
758 def parse_rmsf_file(self, fname, component):
759 self.rmsf[component] = rmsf = {}
760 with open(str(fname))
as fh:
762 resnum, blocknum, val = line.split()
763 rmsf[int(resnum)] = (int(blocknum), float(val))
765 def get_rmsf(self, component, indexes):
766 """Get the RMSF value for the given residue indexes."""
769 rmsf = self.rmsf[component]
770 blocknums = dict.fromkeys(rmsf[ind][0]
for ind
in indexes)
771 if len(blocknums) != 1:
772 raise ValueError(
"Residue indexes %s aren't all in the same block"
774 return rmsf[indexes[0]][1]
777 for chain
in self.all_chains(self.simo):
778 pmi_offset = chain.asym_unit.entity.pmi_offset
779 for atom
in chain.atoms:
780 (xyz, atom_type, residue_type, residue_index,
781 all_indexes, radius) = atom
782 pt = self.transform * xyz
783 yield ihm.model.Atom(
784 asym_unit=chain.asym_unit,
785 seq_id=residue_index - pmi_offset,
786 atom_id=atom_type.get_string(),
788 x=pt[0], y=pt[1], z=pt[2])
790 def get_spheres(self):
791 for chain
in self.all_chains(self.simo):
792 pmi_offset = chain.asym_unit.entity.pmi_offset
793 for sphere
in chain.spheres:
794 (xyz, residue_type, residue_index,
795 all_indexes, radius) = sphere
796 if all_indexes
is None:
797 all_indexes = (residue_index,)
798 pt = self.transform * xyz
799 yield ihm.model.Sphere(
800 asym_unit=chain.asym_unit,
801 seq_id_range=(all_indexes[0] - pmi_offset,
802 all_indexes[-1] - pmi_offset),
803 x=pt[0], y=pt[1], z=pt[2], radius=radius,
804 rmsf=self.get_rmsf(chain.orig_comp, all_indexes))
808 def __init__(self, simo):
809 self.simo = weakref.proxy(simo)
811 self.protocols = OrderedDict()
813 def add_protocol(self, state):
814 """Add a new Protocol"""
815 if state
not in self.protocols:
816 self.protocols[state] = []
817 p = ihm.protocol.Protocol()
818 self.simo.system.orphan_protocols.append(p)
819 self.protocols[state].append(p)
821 def add_step(self, step, state):
822 """Add a ProtocolStep to the last Protocol of the given State"""
823 if state
not in self.protocols:
824 self.add_protocol(state)
825 protocol = self.get_last_protocol(state)
826 if len(protocol.steps) == 0:
827 step.num_models_begin = 0
829 step.num_models_begin = protocol.steps[-1].num_models_end
830 protocol.steps.append(step)
831 step.id = len(protocol.steps)
833 step.dataset_group = self.simo.all_datasets.get_all_group(state)
835 def add_postproc(self, step, state):
836 """Add a postprocessing step to the last protocol"""
837 protocol = self.get_last_protocol(state)
838 if len(protocol.analyses) == 0:
839 protocol.analyses.append(ihm.analysis.Analysis())
840 protocol.analyses[-1].steps.append(step)
842 def get_last_protocol(self, state):
843 """Return the most recently-added _Protocol"""
844 return self.protocols[state][-1]
847 class _AllStartingModels:
848 def __init__(self, simo):
852 self.models = OrderedDict()
855 def add_pdb_fragment(self, fragment):
856 """Add a starting model PDB fragment."""
857 comp = fragment.component
858 state = fragment.state
859 if comp
not in self.models:
860 self.models[comp] = OrderedDict()
861 if state
not in self.models[comp]:
862 self.models[comp][state] = []
863 models = self.models[comp][state]
864 if len(models) == 0 \
865 or models[-1].fragments[0].pdbname != fragment.pdbname:
866 model = self._add_model(fragment)
870 models[-1].fragments.append(weakref.proxy(fragment))
874 pmi_offset = models[-1].asym_unit.entity.pmi_offset
875 sid_begin = min(fragment.start - pmi_offset,
876 models[-1].asym_unit.seq_id_range[0])
877 sid_end = max(fragment.end - pmi_offset,
878 models[-1].asym_unit.seq_id_range[1])
879 models[-1].asym_unit = fragment.asym_unit.asym(sid_begin, sid_end)
880 fragment.starting_model = models[-1]
882 def _add_model(self, f):
883 parser = ihm.metadata.PDBParser()
884 r = parser.parse_file(f.pdbname)
886 self.simo._add_dataset(r[
'dataset'])
888 templates = r[
'templates'].get(f.chain, [])
891 self.simo.system.locations.append(t.alignment_file)
893 self.simo._add_dataset(t.dataset)
894 source = r[
'entity_source'].get(f.chain)
896 f.asym_unit.entity.source = source
897 pmi_offset = f.asym_unit.entity.pmi_offset
899 asym_unit=f.asym_unit.asym.pmi_range(f.start, f.end),
900 dataset=r[
'dataset'], asym_id=f.chain,
901 templates=templates, offset=f.offset + pmi_offset,
902 metadata=r[
'metadata'],
903 software=r[
'software'][0]
if r[
'software']
else None,
904 script_file=r[
'script'])
905 m.fragments = [weakref.proxy(f)]
909 class _StartingModel(ihm.startmodel.StartingModel):
910 def get_seq_dif(self):
914 pmi_offset = self.asym_unit.entity.pmi_offset
915 mh = IMP.mmcif.data._StartingModelAtomHandler(self.templates,
917 for f
in self.fragments:
920 residue_indexes=list(range(f.start - f.offset,
921 f.end - f.offset + 1)))
922 for a
in mh.get_ihm_atoms(sel.get_selected_particles(),
923 f.offset - pmi_offset):
925 self._seq_dif = mh._seq_dif
928 class _ReplicaExchangeAnalysisPostProcess(ihm.analysis.ClusterStep):
929 """Post processing using AnalysisReplicaExchange0 macro"""
931 def __init__(self, rex, num_models_begin):
934 for fname
in self.get_all_stat_files():
935 with open(str(fname))
as fh:
936 num_models_end += len(fh.readlines())
938 feature=
'RMSD', num_models_begin=num_models_begin,
939 num_models_end=num_models_end)
941 def get_stat_file(self, cluster_num):
942 return self.rex._outputdir / (
"cluster.%d" % cluster_num) /
'stat.out'
944 def get_all_stat_files(self):
945 for i
in range(self.rex._number_of_clusters):
946 yield self.get_stat_file(i)
949 class _ReplicaExchangeAnalysisEnsemble(ihm.model.Ensemble):
950 """Ensemble generated using AnalysisReplicaExchange0 macro"""
952 num_models_deposited =
None
954 def __init__(self, pp, cluster_num, model_group, num_deposit):
955 with open(str(pp.get_stat_file(cluster_num)))
as fh:
956 num_models = len(fh.readlines())
958 num_models=num_models,
959 model_group=model_group, post_process=pp,
960 clustering_feature=pp.feature,
961 name=model_group.name)
962 self.cluster_num = cluster_num
963 self.num_models_deposited = num_deposit
965 def get_rmsf_file(self, component):
966 return (self.post_process.rex._outputdir
967 / (
'cluster.%d' % self.cluster_num)
968 / (
'rmsf.%s.dat' % component))
970 def load_rmsf(self, model, component):
971 fname = self.get_rmsf_file(component)
973 model.parse_rmsf_file(fname, component)
975 def get_localization_density_file(self, fname):
976 return (self.post_process.rex._outputdir
977 / (
'cluster.%d' % self.cluster_num)
978 / (
'%s.mrc' % fname))
980 def load_localization_density(self, state, fname, select_tuple,
982 fullpath = self.get_localization_density_file(fname)
983 if fullpath.exists():
984 details =
"Localization density for %s %s" \
985 % (fname, self.model_group.name)
986 local_file = ihm.location.OutputFileLocation(str(fullpath),
988 for s
in select_tuple:
989 if isinstance(s, tuple)
and len(s) == 3:
990 asym = asym_units[s[2]].pmi_range(s[0], s[1])
993 den = ihm.model.LocalizationDensity(file=local_file,
995 self.densities.append(den)
997 def load_all_models(self, simo, state):
998 stat_fname = self.post_process.get_stat_file(self.cluster_num)
1000 with open(str(stat_fname))
as fh:
1001 stats = ast.literal_eval(fh.readline())
1003 rmf_file = stat_fname.parent / (
"%d.rmf3" % model_num)
1005 if rmf_file.exists():
1006 rh = RMF.open_rmf_file_read_only(str(rmf_file))
1007 system = state._pmi_object.system
1013 if model_num >= self.num_models_deposited:
1017 def _get_precision(self):
1018 precfile = (self.post_process.rex._outputdir /
1019 (
"precision.%d.%d.out" % (self.cluster_num,
1021 if not precfile.exists():
1025 r'All .*/cluster.%d/ average centroid distance ([\d\.]+)'
1027 with open(str(precfile))
as fh:
1031 return float(m.group(1))
1033 precision = property(
lambda self: self._get_precision(),
1034 lambda self, val:
None)
1037 class _SimpleEnsemble(ihm.model.Ensemble):
1038 """Simple manually-created ensemble"""
1040 num_models_deposited =
None
1042 def __init__(self, pp, model_group, num_models, drmsd,
1043 num_models_deposited, ensemble_file):
1045 model_group=model_group, post_process=pp, num_models=num_models,
1046 file=ensemble_file, precision=drmsd, name=model_group.name,
1047 clustering_feature=
'dRMSD')
1048 self.num_models_deposited = num_models_deposited
1050 def load_localization_density(self, state, component, asym, local_file):
1051 den = ihm.model.LocalizationDensity(file=local_file, asym_unit=asym)
1052 self.densities.append(den)
1055 class _CustomDNAAlphabet:
1056 """Custom DNA alphabet that maps A,C,G,T (rather than DA,DC,DG,DT
1057 as in python-ihm)"""
1058 _comps = dict([cc.code_canonical, cc]
1059 for cc
in ihm.DNAAlphabet._comps.values())
1062 class _EntityMapper(dict):
1063 """Handle mapping from IMP components (without copy number) to CIF
1064 entities. Multiple components may map to the same entity if they
1066 def __init__(self, system):
1068 self._sequence_dict = {}
1070 self.system = system
1072 def _get_alphabet(self, alphabet):
1073 """Map a PMI alphabet to an IHM alphabet"""
1077 alphabet_map = {
None: ihm.LPeptideAlphabet,
1078 IMP.pmi.alphabets.amino_acid: ihm.LPeptideAlphabet,
1079 IMP.pmi.alphabets.rna: ihm.RNAAlphabet,
1080 IMP.pmi.alphabets.dna: _CustomDNAAlphabet}
1081 if alphabet
in alphabet_map:
1082 return alphabet_map[alphabet]
1084 raise TypeError(
"Don't know how to handle %s" % alphabet)
1086 def add(self, component_name, sequence, offset, alphabet, uniprot):
1087 def entity_seq(sequence):
1090 return [
'UNK' if s ==
'X' else s
for s
in sequence]
1093 if sequence
not in self._sequence_dict:
1096 d = component_name.split(
"@")[0].split(
".")[0]
1097 entity = Entity(entity_seq(sequence), description=d,
1099 alphabet=self._get_alphabet(alphabet),
1101 self.system.entities.append(entity)
1102 self._sequence_dict[sequence] = entity
1103 self[component_name] = self._sequence_dict[sequence]
1106 class _TransformedComponent:
1107 def __init__(self, name, original, transform):
1108 self.name, self.original, self.transform = name, original, transform
1112 """Class with similar interface to weakref.ref, but keeps a strong ref"""
1113 def __init__(self, ref):
1120 class _State(ihm.model.State):
1121 """Representation of a single state in the system."""
1123 def __init__(self, pmi_object, po):
1128 self._pmi_object = weakref.proxy(pmi_object)
1129 if hasattr(pmi_object,
'state'):
1132 self._pmi_state = _SimpleRef(pmi_object.state)
1134 self._pmi_state = weakref.ref(pmi_object)
1136 old_name = self.name
1137 super().__init__(experiment_type=
'Fraction of bulk')
1138 self.name = old_name
1142 self.modeled_assembly = ihm.Assembly(
1143 name=
"Modeled assembly",
1144 description=self.get_postfixed_name(
1145 "All components modeled by IMP"))
1146 po.system.orphan_assemblies.append(self.modeled_assembly)
1148 self.all_modeled_components = []
1151 return hash(self._pmi_state())
1153 def __eq__(self, other):
1154 return self._pmi_state() == other._pmi_state()
1156 def add_model_group(self, group):
1160 def get_prefixed_name(self, name):
1161 """Prefix the given name with the state name, if available."""
1163 return self.short_name +
' ' + name
1167 return name[0].upper() + name[1:]
if name
else ''
1169 def get_postfixed_name(self, name):
1170 """Postfix the given name with the state name, if available."""
1172 return "%s in state %s" % (name, self.short_name)
1176 short_name = property(
lambda self: self._pmi_state().short_name)
1177 long_name = property(
lambda self: self._pmi_state().long_name)
1179 def __get_name(self):
1180 return self._pmi_state().long_name
1182 def __set_name(self, val):
1183 self._pmi_state().long_name = val
1185 name = property(__get_name, __set_name)
1189 """A single entity in the system. This contains information (such as
1190 database identifiers) specific to a particular sequence rather than
1191 a copy (for example, when modeling a homodimer, two AsymUnits will
1192 point to the same Entity).
1194 This functions identically to the base ihm.Entity class, but it
1195 allows identifying residues by either the PMI numbering scheme
1196 (which is always contiguous starting at 1, covering the entire
1197 sequence in the FASTA files, or the IHM scheme (seq_id, which also
1198 starts at 1, but which only covers the modeled subset of the full
1199 sequence, with non-modeled N-terminal or C-terminal residues
1200 removed). The actual offset (which is the integer to be added to the
1201 IHM numbering to get PMI numbering, or equivalently the number of
1202 not-represented N-terminal residues in the PMI sequence) is
1203 available in the `pmi_offset` member.
1205 If a UniProt accession was provided for the sequence (either when
1206 State.create_molecule() was called, or in the FASTA alignment file
1207 header) then that is available in the `uniprot` member, and can be
1208 added to the IHM system with the add_uniprot_reference method.
1210 def __init__(self, sequence, pmi_offset, uniprot, *args, **keys):
1213 self.pmi_offset = pmi_offset
1214 self.uniprot = uniprot
1215 super().__init__(sequence, *args, **keys)
1218 """Return a single IHM residue indexed using PMI numbering"""
1219 return self.residue(res_id - self.pmi_offset)
1222 """Return a range of IHM residues indexed using PMI numbering"""
1223 off = self.pmi_offset
1224 return self(res_id_begin - off, res_id_end - off)
1227 """Add UniProt accession (if available) to the IHM system.
1228 If a UniProt accession was provided for the sequence (either when
1229 State.create_molecule() was called, or in the FASTA alignment file
1230 header), then look this up at the UniProt web site (requires
1231 network access) to get full information, and add it to the IHM
1232 system. The resulting reference object is returned. If the IMP
1233 and UniProt sequences are not identical, then this object may
1234 need to be modified by specifying an alignment and/or
1235 single-point mutations.
1238 print(
'Adding UniProt accession %s reference for entity %s'
1239 % (self.uniprot, self.description))
1240 ref = ihm.reference.UniProtSequence.from_accession(self.uniprot)
1241 self.references.append(ref)
1246 """A single asymmetric unit in the system. This roughly corresponds to
1247 a single PMI subunit (sequence, copy, or clone).
1249 This functions identically to the base ihm.AsymUnit class, but it
1250 allows identifying residues by either the PMI numbering scheme
1251 (which is always contiguous starting at 1, covering the entire
1252 sequence in the FASTA files, or the IHM scheme (seq_id, which also
1253 starts at 1, but which only covers the modeled subset of the full
1254 sequence, with non-modeled N-terminal or C-terminal residues
1257 The `entity` member of this class points to an Entity object, which
1258 contains information (such as database identifiers) specific to
1259 a particular sequence rather than a copy (for example, when modeling
1260 a homodimer, two AsymUnits will point to the same Entity).
1263 def __init__(self, entity, *args, **keys):
1265 entity, auth_seq_id_map=entity.pmi_offset, *args, **keys)
1268 """Return a single IHM residue indexed using PMI numbering"""
1269 return self.residue(res_id - self.entity.pmi_offset)
1272 """Return a range of IHM residues indexed using PMI numbering"""
1273 off = self.entity.pmi_offset
1274 return self(res_id_begin - off, res_id_end - off)
1278 """Class to encode a modeling protocol as mmCIF.
1280 IMP has basic support for writing out files in mmCIF format, for
1281 deposition in [PDB-IHM](https://pdb-ihm.org/).
1282 After creating an instance of this class, attach it to an
1283 IMP.pmi.topology.System object. After this, any
1284 generated models and metadata are automatically collected in the
1285 `system` attribute, which is an
1286 [ihm.System](https://python-ihm.readthedocs.io/en/latest/main.html#ihm.System) object.
1287 Once the protocol is complete, call finalize() to make sure `system`
1288 contains everything, then use the
1289 [python-ihm API](https://python-ihm.readthedocs.io/en/latest/dumper.html#ihm.dumper.write)
1290 to write out files in mmCIF or BinaryCIF format.
1292 Each PMI subunit will be mapped to an IHM AsymUnit class which contains the
1293 subset of the sequence that was represented. Use the `asym_units` dict to
1294 get this object given a PMI subunit name. Each unique sequence will be
1295 mapped to an IHM Entity class (for example when modeling a homodimer
1296 there will be two AsymUnits which both point to the same Entity). Use
1297 the `entities` dict to get this object from a PMI subunit name.
1299 See also Entity, AsymUnit, get_handlers(), get_dumpers().
1303 self.system = ihm.System()
1304 self._state_group = ihm.model.StateGroup()
1305 self.system.state_groups.append(self._state_group)
1307 self._state_ensemble_offset = 0
1308 self._main_script = os.path.abspath(sys.argv[0])
1311 loc = ihm.location.WorkflowFileLocation(
1312 path=self._main_script,
1313 details=
"The main integrative modeling script")
1314 self.system.locations.append(loc)
1317 self.__asym_states = {}
1318 self._working_directory = os.getcwd()
1320 "Default representation")
1321 self.entities = _EntityMapper(self.system)
1323 self.asym_units = {}
1324 self._all_components = {}
1325 self.all_modeled_components = []
1326 self._transformed_components = []
1327 self.sequence_dict = {}
1330 self._xy_plane = ihm.geometry.XYPlane()
1331 self._xz_plane = ihm.geometry.XZPlane()
1332 self._z_axis = ihm.geometry.ZAxis()
1333 self._center_origin = ihm.geometry.Center(0, 0, 0)
1334 self._identity_transform = ihm.geometry.Transformation.identity()
1337 self._exclude_coords = {}
1339 self.all_representations = _AllModelRepresentations(self)
1340 self.all_protocols = _AllProtocols(self)
1341 self.all_datasets = _AllDatasets(self.system)
1342 self.all_starting_models = _AllStartingModels(self)
1344 self.all_software = _AllSoftware(self.system)
1347 """Create a new Representation and return it. This can be
1348 passed to add_model(), add_bead_element() or add_pdb_element()."""
1349 r = ihm.representation.Representation(name=name)
1350 self.system.orphan_representations.append(r)
1354 """Don't record coordinates for the given domain.
1355 Coordinates for the given domain (specified by a component name
1356 and a 2-element tuple giving the start and end residue numbers)
1357 will be excluded from the mmCIF file. This can be used to exclude
1358 parts of the structure that weren't well resolved in modeling.
1359 Any bead or residue that lies wholly within this range will be
1360 excluded. Multiple ranges for a given component can be excluded
1361 by calling this method multiple times."""
1362 if component
not in self._exclude_coords:
1363 self._exclude_coords[component] = []
1364 self._exclude_coords[component].append(seqrange)
1366 def _is_excluded(self, component, start, end):
1367 """Return True iff this chunk of sequence should be excluded"""
1368 for seqrange
in self._exclude_coords.get(component, ()):
1369 if start >= seqrange[0]
and end <= seqrange[1]:
1372 def _add_state(self, state):
1373 """Create a new state and return a pointer to it."""
1374 self._state_ensemble_offset = len(self.system.ensembles)
1375 s = _State(state, self)
1376 self._state_group.append(s)
1377 self._last_state = s
1380 def _get_chain_for_component(self, name, output):
1381 """Get the chain ID for a component, if any."""
1383 if name
in self.asym_units:
1384 return self.asym_units[name]._id
1389 def _get_assembly_comps(self, assembly):
1390 """Get the names of the components in the given assembly"""
1394 comps[ca.details] =
None
1398 """Make a new component that's a transformed copy of another.
1399 All representation for the existing component is copied to the
1401 assembly_comps = self._get_assembly_comps(state.modeled_assembly)
1402 if name
in assembly_comps:
1403 raise ValueError(
"Component %s already exists" % name)
1404 elif original
not in assembly_comps:
1405 raise ValueError(
"Original component %s does not exist" % original)
1406 self.create_component(state, name,
True)
1407 self.add_component_sequence(state, name, self.sequence_dict[original])
1408 self._transformed_components.append(_TransformedComponent(
1409 name, original, transform))
1410 self.all_representations.copy_component(state, name, original,
1411 self.asym_units[name])
1413 def create_component(self, state, name, modeled, asym_name=None):
1414 if asym_name
is None:
1416 new_comp = name
not in self._all_components
1417 self._all_components[name] =
None
1419 state.all_modeled_components.append(name)
1420 if asym_name
not in self.asym_units:
1422 self.asym_units[asym_name] =
None
1424 self.all_modeled_components.append(name)
1426 def add_component_sequence(self, state, name, seq, asym_name=None,
1427 alphabet=
None, uniprot=
None):
1428 if asym_name
is None:
1431 if name
in self.sequence_dict:
1432 if self.sequence_dict[name] != seq:
1433 raise ValueError(
"Sequence mismatch for component %s" % name)
1435 self.sequence_dict[name] = seq
1439 self.entities.add(name, seq, 0, alphabet, uniprot)
1440 if asym_name
in self.asym_units:
1441 if self.asym_units[asym_name]
is None:
1443 entity = self.entities[name]
1444 asym =
AsymUnit(entity, details=asym_name)
1445 self.system.asym_units.append(asym)
1446 self.asym_units[asym_name] = asym
1447 state.modeled_assembly.append(self.asym_units[asym_name])
1450 """Called immediately after the PMI system is built"""
1451 for entity
in self.system.entities:
1452 _trim_unrep_termini(entity, self.system.asym_units,
1453 self.system.orphan_representations)
1455 def _add_restraint_model_fits(self):
1456 """Add fits to restraints for all known models"""
1457 for group, m
in self.system._all_models():
1458 if m._is_restrained:
1459 for r
in self.system.restraints:
1460 if hasattr(r,
'add_fits_from_model_statfile'):
1461 r.add_fits_from_model_statfile(m)
1464 """Do any final processing on the class hierarchy.
1465 After calling this method, the `system` member (an instance
1466 of `ihm.System`) completely reproduces the PMI modeling, and
1467 can be written out to an mmCIF file with `ihm.dumper.write`,
1468 and/or modified using the ihm API."""
1469 self._add_restraint_model_fits()
1471 def add_pdb_element(self, state, name, start, end, offset, pdbname,
1472 chain, hier, representation=
None):
1473 if self._is_excluded(name, start, end):
1475 if representation
is None:
1476 representation = self.default_representation
1477 asym = self.asym_units[name]
1478 p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
1480 self.all_representations.add_fragment(state, representation, p)
1481 self.all_starting_models.add_pdb_fragment(p)
1483 def add_bead_element(self, state, name, start, end, num, hier,
1484 representation=
None):
1485 if self._is_excluded(name, start, end):
1487 if representation
is None:
1488 representation = self.default_representation
1489 asym = self.asym_units[name]
1490 pmi_offset = asym.entity.pmi_offset
1491 b = _BeadsFragment(state, name, start - pmi_offset, end - pmi_offset,
1493 self.all_representations.add_fragment(state, representation, b)
1495 def get_cross_link_group(self, pmi_restraint):
1496 r = _CrossLinkRestraint(pmi_restraint)
1497 self.system.restraints.append(r)
1498 self._add_restraint_dataset(r)
1501 def add_experimental_cross_link(self, r1, c1, r2, c2, rsr):
1502 if c1
not in self._all_components
or c2
not in self._all_components:
1508 e1 = self.entities[c1]
1509 e2 = self.entities[c2]
1510 xl = ihm.restraint.ExperimentalCrossLink(residue1=e1.pmi_residue(r1),
1511 residue2=e2.pmi_residue(r2))
1512 rsr.experimental_cross_links.append([xl])
1515 def add_cross_link(self, state, ex_xl, p1, p2, length, sigma1_p, sigma2_p,
1518 asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1519 d = ihm.restraint.UpperBoundDistanceRestraint(length)
1521 if _get_by_residue(p1)
and _get_by_residue(p2):
1522 cls = _ResidueCrossLink
1524 cls = _FeatureCrossLink
1525 xl = cls(ex_xl, asym1=asym[p1], asym2=asym[p2], distance=d,
1528 xl.psi_p, xl.sigma1_p, xl.sigma2_p = psi_p, sigma1_p, sigma2_p
1529 rsr.cross_links.append(xl)
1531 def add_replica_exchange(self, state, rex):
1536 step = _ReplicaExchangeProtocolStep(state, rex)
1537 step.software = self.all_software.pmi
1538 self.all_protocols.add_step(step, state)
1540 def _add_simple_dynamics(self, num_models_end, method):
1542 state = self._last_state
1543 self.all_protocols.add_step(_SimpleProtocolStep(state, num_models_end,
1546 def _add_protocol(self):
1548 state = self._last_state
1549 self.all_protocols.add_protocol(state)
1551 def _add_dataset(self, dataset):
1552 return self.all_datasets.add(self._last_state, dataset)
1554 def _add_restraint_dataset(self, restraint):
1555 return self.all_datasets.add_restraint(self._last_state, restraint)
1557 def _add_simple_postprocessing(self, num_models_begin, num_models_end):
1559 state = self._last_state
1560 pp = ihm.analysis.ClusterStep(
'RMSD', num_models_begin, num_models_end)
1561 self.all_protocols.add_postproc(pp, state)
1564 def _add_no_postprocessing(self, num_models):
1566 state = self._last_state
1567 pp = ihm.analysis.EmptyStep()
1568 pp.num_models_begin = pp.num_models_end = num_models
1569 self.all_protocols.add_postproc(pp, state)
1572 def _add_simple_ensemble(self, pp, name, num_models, drmsd,
1573 num_models_deposited, localization_densities,
1575 """Add an ensemble generated by ad hoc methods (not using PMI).
1576 This is currently only used by the Nup84 system."""
1578 state = self._last_state
1579 group = ihm.model.ModelGroup(name=state.get_postfixed_name(name))
1580 state.add_model_group(group)
1582 self.system.locations.append(ensemble_file)
1583 e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
1585 self.system.ensembles.append(e)
1586 for c
in state.all_modeled_components:
1587 den = localization_densities.get(c,
None)
1589 e.load_localization_density(state, c, self.asym_units[c], den)
1593 """Point a previously-created ensemble to an 'all-models' file.
1594 This could be a trajectory such as DCD, an RMF, or a multimodel
1596 self.system.locations.append(location)
1598 ind = i + self._state_ensemble_offset
1599 self.system.ensembles[ind].file = location
1601 def add_replica_exchange_analysis(self, state, rex, density_custom_ranges):
1607 protocol = self.all_protocols.get_last_protocol(state)
1608 num_models = protocol.steps[-1].num_models_end
1609 pp = _ReplicaExchangeAnalysisPostProcess(rex, num_models)
1610 pp.software = self.all_software.pmi
1611 self.all_protocols.add_postproc(pp, state)
1612 for i
in range(rex._number_of_clusters):
1613 group = ihm.model.ModelGroup(name=state.get_prefixed_name(
1614 'cluster %d' % (i + 1)))
1615 state.add_model_group(group)
1617 e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
1618 self.system.ensembles.append(e)
1620 for fname, stuple
in sorted(density_custom_ranges.items()):
1621 e.load_localization_density(state, fname, stuple,
1623 for stats
in e.load_all_models(self, state):
1624 m = self.add_model(group)
1627 m.name =
'Best scoring model'
1630 for c
in state.all_modeled_components:
1633 def _get_subassembly(self, comps, name, description):
1634 """Get an Assembly consisting of the given components.
1635 `compdict` is a dictionary of the components to add, where keys
1636 are the component names and values are the sequence ranges (or
1637 None to use all residues in the component)."""
1639 for comp, seqrng
in comps.items():
1640 a = self.asym_units[comp]
1641 asyms.append(a
if seqrng
is None else a(*seqrng))
1643 a = ihm.Assembly(asyms, name=name, description=description)
1646 def _add_foxs_restraint(self, model, comp, seqrange, dataset, rg, chi,
1648 """Add a basic FoXS fit. This is largely intended for use from the
1650 assembly = self._get_subassembly(
1652 name=
"SAXS subassembly",
1653 description=
"All components that fit SAXS data")
1654 r = ihm.restraint.SASRestraint(
1655 dataset, assembly, segment=
False,
1656 fitting_method=
'FoXS', fitting_atom_type=
'Heavy atoms',
1657 multi_state=
False, radius_of_gyration=rg, details=details)
1658 r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi)
1659 self.system.restraints.append(r)
1660 self._add_restraint_dataset(r)
1662 def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
1663 image_resolution, projection_number,
1664 micrographs_number):
1665 r = _EM2DRestraint(state, r, i, resolution, pixel_size,
1666 image_resolution, projection_number,
1668 self.system.restraints.append(r)
1669 self._add_restraint_dataset(r)
1671 def add_em3d_restraint(self, state, target_ps, densities, pmi_restraint):
1673 r = _EM3DRestraint(self, state, pmi_restraint, target_ps, densities)
1674 self.system.restraints.append(r)
1675 self._add_restraint_dataset(r)
1677 def add_zaxial_restraint(self, state, ps, lower_bound, upper_bound,
1678 sigma, pmi_restraint):
1679 self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1680 sigma, pmi_restraint, self._xy_plane)
1682 def add_yaxial_restraint(self, state, ps, lower_bound, upper_bound,
1683 sigma, pmi_restraint):
1684 self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1685 sigma, pmi_restraint, self._xz_plane)
1687 def add_xyradial_restraint(self, state, ps, lower_bound, upper_bound,
1688 sigma, pmi_restraint):
1689 self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1690 sigma, pmi_restraint, self._z_axis)
1692 def _add_geometric_restraint(self, state, ps, lower_bound, upper_bound,
1693 sigma, pmi_restraint, geom):
1694 asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1695 r = _GeometricRestraint(
1696 self, state, pmi_restraint, geom, asym.get_feature(ps),
1697 ihm.restraint.LowerUpperBoundDistanceRestraint(lower_bound,
1700 self.system.restraints.append(r)
1701 self._add_restraint_dataset(r)
1703 def _get_membrane(self, tor_R, tor_r, tor_th):
1704 """Get an object representing a half-torus membrane"""
1705 if not hasattr(self,
'_seen_membranes'):
1706 self._seen_membranes = {}
1709 membrane_id = tuple(int(x * 100.)
for x
in (tor_R, tor_r, tor_th))
1710 if membrane_id
not in self._seen_membranes:
1711 m = ihm.geometry.HalfTorus(
1712 center=self._center_origin,
1713 transformation=self._identity_transform,
1714 major_radius=tor_R, minor_radius=tor_r, thickness=tor_th,
1715 inner=
True, name=
'Membrane')
1716 self._seen_membranes[membrane_id] = m
1717 return self._seen_membranes[membrane_id]
1719 def add_membrane_surface_location_restraint(
1720 self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1721 self._add_membrane_restraint(
1722 state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint,
1723 ihm.restraint.UpperBoundDistanceRestraint(0.))
1725 def add_membrane_exclusion_restraint(
1726 self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1727 self._add_membrane_restraint(
1728 state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint,
1729 ihm.restraint.LowerBoundDistanceRestraint(0.))
1731 def _add_membrane_restraint(self, state, ps, tor_R, tor_r, tor_th,
1732 sigma, pmi_restraint, rsr):
1733 asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1734 r = _GeometricRestraint(
1735 self, state, pmi_restraint,
1736 self._get_membrane(tor_R, tor_r, tor_th), asym.get_feature(ps),
1738 self.system.restraints.append(r)
1739 self._add_restraint_dataset(r)
1741 def add_model(self, group, assembly=None, representation=None):
1742 state = self._last_state
1743 if representation
is None:
1744 representation = self.default_representation
1745 protocol = self.all_protocols.get_last_protocol(state)
1746 m = _Model(state.prot, self, protocol,
1747 assembly
if assembly
else state.modeled_assembly,
1754 """Get custom python-ihm dumpers for writing PMI to from mmCIF.
1755 This returns a list of custom dumpers that can be passed as all or
1756 part of the `dumpers` argument to ihm.dumper.write(). They add
1757 PMI-specific information to mmCIF or BinaryCIF files written out
1759 return [_ReplicaExchangeProtocolDumper]
1763 """Get custom python-ihm handlers for reading PMI data from mmCIF.
1764 This returns a list of custom handlers that can be passed as all or
1765 part of the `handlers` argument to ihm.reader.read(). They read
1766 PMI-specific information from mmCIF or BinaryCIF files read in
1768 return [_ReplicaExchangeProtocolHandler]
1772 """Extract metadata from an EM density GMM file."""
1775 """Extract metadata from `filename`.
1776 @return a dict with key `dataset` pointing to the GMM file and
1777 `number_of_gaussians` to the number of GMMs (or None)"""
1778 loc = ihm.location.InputFileLocation(
1780 details=
"Electron microscopy density map, "
1781 "represented as a Gaussian Mixture Model (GMM)")
1784 loc._allow_duplicates =
True
1785 d = ihm.dataset.EMDensityDataset(loc)
1786 ret = {
'dataset': d,
'number_of_gaussians':
None}
1788 with open(filename)
as fh:
1790 if line.startswith(
'# data_fn: '):
1791 p = ihm.metadata.MRCParser()
1792 fn = line[11:].rstrip(
'\r\n')
1793 dataset = p.parse_file(os.path.join(
1794 os.path.dirname(filename), fn))[
'dataset']
1795 ret[
'dataset'].parents.append(dataset)
1796 elif line.startswith(
'# ncenters: '):
1797 ret[
'number_of_gaussians'] = int(line[12:])
1801 def _trim_unrep_termini(entity, asyms, representations):
1802 """Trim Entity sequence to only cover represented residues.
1804 PDB policy is for amino acid Entity sequences to be polymers (so
1805 they should include any loops or other gaps in the midst of the
1806 sequence) but for the termini to be trimmed of any not-modeled
1807 residues. Here, we modify the Entity sequence to remove any parts
1808 that are not included in any representation. This may change the
1809 numbering if any N-terminal residues are removed, and thus the offset
1810 between PMI and IHM numbering, as both count from 1."""
1812 for rep
in representations:
1814 if seg.asym_unit.entity
is entity:
1815 seg_range = seg.asym_unit.seq_id_range
1816 if rep_range
is None:
1817 rep_range = list(seg_range)
1819 rep_range[0] = min(rep_range[0], seg_range[0])
1820 rep_range[1] = max(rep_range[1], seg_range[1])
1822 if rep_range
is None or rep_range == [1, len(entity.sequence)]:
1828 pmi_offset = int(rep_range[0]) - 1
1829 entity.pmi_offset = pmi_offset
1831 if asym.entity
is entity:
1832 asym.auth_seq_id_map = entity.pmi_offset
1834 entity.sequence = entity.sequence[rep_range[0] - 1:rep_range[1]]
1839 for rep
in representations:
1841 if seg.asym_unit.entity
is entity:
1842 seg_range = seg.asym_unit.seq_id_range
1843 seg.asym_unit.seq_id_range = (seg_range[0] - pmi_offset,
1844 seg_range[1] - pmi_offset)
1845 if seg.starting_model:
1846 model = seg.starting_model
1847 seg_range = model.asym_unit.seq_id_range
1848 model.asym_unit.seq_id_range = \
1849 (seg_range[0] - pmi_offset,
1850 seg_range[1] - pmi_offset)
1851 model.offset = model.offset - pmi_offset
Select non water and non hydrogen atoms.
def get_handlers
Get custom python-ihm handlers for reading PMI data from mmCIF.
def create_representation
Create a new Representation and return it.
def create_transformed_component
Make a new component that's a transformed copy of another.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def exclude_coordinates
Don't record coordinates for the given domain.
A decorator to associate a particle with a part of a protein/DNA/RNA.
def add_uniprot_reference
Add UniProt accession (if available) to the IHM system.
def finalize_build
Called immediately after the PMI system is built.
Class to encode a modeling protocol as mmCIF.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def parse_file
Extract metadata from filename.
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Extract metadata from an EM density GMM file.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
void read_pdb(TextInput input, int model, Hierarchy h)
A single asymmetric unit in the system.
A single entity in the system.
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
Classes to represent data structures used in mmCIF.
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
void add_restraint(RMF::FileHandle fh, Restraint *hs)
static bool get_is_setup(Model *m, ParticleIndex pi)
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def finalize
Do any final processing on the class hierarchy.
Base class for capturing a modeling protocol.
Basic utilities for handling cryo-electron microscopy 3D density maps.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
def get_dumpers
Get custom python-ihm dumpers for writing PMI to from mmCIF.
Class for easy writing of PDBs, RMFs, and stat files.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
A decorator for a residue.
Basic functionality that is expected to be used by a wide variety of IMP users.
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Mapping between FASTA one-letter codes and residue types.
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Select all ATOM and HETATM records with the given chain ids.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...