1 """@namespace IMP.pmi.mmcif
2 @brief Support for the mmCIF file format.
4 IMP has basic support for writing out files in mmCIF format, for
5 deposition in [PDB-IHM](https://pdb-ihm.org/).
6 mmCIF files are currently generated by creating an
7 IMP.pmi.mmcif.ProtocolOutput class, and attaching it to an
8 IMP.pmi.representation.Representation object, after which any
9 generated models and metadata are collected and output as mmCIF.
38 import ihm.representation
40 import ihm.cross_linkers
44 def _assign_id(obj, seen_objs, obj_by_id):
45 """Assign a unique ID to obj, and track all ids in obj_by_id."""
46 if obj
not in seen_objs:
47 if not hasattr(obj,
'id'):
49 obj.id = len(obj_by_id)
50 seen_objs[obj] = obj.id
52 obj.id = seen_objs[obj]
55 def _get_by_residue(p):
56 """Determine whether the given particle represents a specific residue
57 or a more coarse-grained object."""
61 class _ComponentMapper:
62 """Map a Particle to a component name"""
63 def __init__(self, prot):
66 self.name =
'cif-output'
67 self.o.dictionary_pdbs[self.name] = self.prot
68 self.o._init_dictchain(self.name, self.prot,
69 multichar_chain=
True, mmcif=
True)
71 def __getitem__(self, p):
72 protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
77 """Map a Particle to an asym_unit"""
78 def __init__(self, simo, prot):
80 self._cm = _ComponentMapper(prot)
81 self._seen_ranges = {}
83 def __getitem__(self, p):
84 protname = self._cm[p]
85 return self.simo.asym_units[protname]
87 def get_feature(self, ps):
88 """Get an ihm.restraint.Feature that covers the given particles"""
96 rng = asym(rind, rind)
100 rng = asym(rinds[0], rinds[-1])
102 raise ValueError(
"Unsupported particle type %s" % str(p))
104 if len(rngs) > 0
and rngs[-1].asym == asym \
105 and rngs[-1].seq_id_range[1] == rng.seq_id_range[0] - 1:
106 rngs[-1].seq_id_range = (rngs[-1].seq_id_range[0],
113 if hrngs
in self._seen_ranges:
114 return self._seen_ranges[hrngs]
116 feat = ihm.restraint.ResidueFeature(rngs)
117 self._seen_ranges[hrngs] = feat
122 def __init__(self, system):
124 self.modeller_used = self.phyre2_used =
False
125 self.pmi = ihm.Software(
126 name=
"IMP PMI module",
127 version=IMP.pmi.__version__,
128 classification=
"integrative model building",
129 description=
"integrative model building",
130 location=
'https://integrativemodeling.org')
131 self.imp = ihm.Software(
132 name=
"Integrative Modeling Platform (IMP)",
133 version=IMP.__version__,
134 classification=
"integrative model building",
135 description=
"integrative model building",
136 location=
'https://integrativemodeling.org')
139 if hasattr(self.imp,
'citation'):
140 javi =
'Vel\u00e1zquez-Muriel J'
141 self.imp.citation = ihm.Citation(
143 title=
'Putting the pieces together: integrative modeling '
144 'platform software for structure determination of '
145 'macromolecular assemblies',
146 journal=
'PLoS Biol', volume=10, page_range=
'e1001244',
148 authors=[
'Russel D',
'Lasker K',
'Webb B', javi,
'Tjioe E',
149 'Schneidman-Duhovny D',
'Peterson B',
'Sali A'],
150 doi=
'10.1371/journal.pbio.1001244')
151 self.pmi.citation = ihm.Citation(
153 title=
'Modeling Biological Complexes Using Integrative '
154 'Modeling Platform.',
155 journal=
'Methods Mol Biol', volume=2022, page_range=(353, 377),
157 authors=[
'Saltzberg D',
'Greenberg CH',
'Viswanath S',
158 'Chemmama I',
'Webb B',
'Pellarin R',
'Echeverria I',
160 doi=
'10.1007/978-1-4939-9608-7_15')
161 self.system.software.extend([self.pmi, self.imp])
163 def set_modeller_used(self, version, date):
164 if self.modeller_used:
166 self.modeller_used =
True
168 name=
'MODELLER', classification=
'comparative modeling',
169 description=
'Comparative modeling by satisfaction '
170 'of spatial restraints, build ' + date,
171 location=
'https://salilab.org/modeller/', version=version)
172 self.system.software.append(s)
173 if hasattr(s,
'citation'):
174 s.citation = ihm.Citation(
176 title=
'Comparative protein modelling by satisfaction of '
177 'spatial restraints.',
178 journal=
'J Mol Biol', volume=234, page_range=(779, 815),
179 year=1993, authors=[
'Sali A',
'Blundell TL'],
180 doi=
'10.1006/jmbi.1993.1626')
182 def set_phyre2_used(self):
185 self.phyre2_used =
True
187 name=
'Phyre2', classification=
'protein homology modeling',
188 description=
'Protein Homology/analogY Recognition Engine V 2.0',
189 version=
'2.0', location=
'http://www.sbg.bio.ic.ac.uk/~phyre2/')
190 if hasattr(s,
'citation'):
191 s.citation = ihm.Citation(
193 title=
'The Phyre2 web portal for protein modeling, '
194 'prediction and analysis.',
195 journal=
'Nat Protoc', volume=10, page_range=(845, 858),
196 authors=[
'Kelley LA',
'Mezulis S',
'Yates CM',
'Wass MN',
198 year=2015, doi=
'10.1038/nprot.2015.053')
199 self.system.software.append(s)
202 def _get_fragment_is_rigid(fragment):
203 """Determine whether a fragment is modeled rigidly"""
209 class _PDBFragment(ihm.representation.ResidueSegment):
210 """Record details about part of a PDB file used as input
212 def __init__(self, state, component, start, end, pdb_offset,
213 pdbname, chain, hier, asym_unit):
216 asym_unit=asym_unit.pmi_range(start, end),
217 rigid=
None, primitive=
'sphere')
218 self.component, self.start, self.end, self.offset, self.pdbname \
219 = component, start, end, pdb_offset, pdbname
220 self.state, self.chain, self.hier = state, chain, hier
224 if pdbname.endswith(
'.cif'):
225 read_file = IMP.atom.read_mmcif
226 elif pdbname.endswith(
'.bcif'):
227 read_file = IMP.atom.read_bcif
229 read_file = IMP.atom.read_pdb
230 self.starting_hier = read_file(pdbname, state.model, sel)
232 rigid = property(
lambda self: _get_fragment_is_rigid(self),
233 lambda self, val:
None)
235 def combine(self, other):
239 class _BeadsFragment(ihm.representation.FeatureSegment):
240 """Record details about beads used to represent part of a component."""
243 def __init__(self, state, component, start, end, count, hier, asym_unit):
245 asym_unit=asym_unit(start, end), rigid=
None, primitive=
'sphere',
247 self.state, self.component, self.hier = state, component, hier
249 rigid = property(
lambda self: _get_fragment_is_rigid(self),
250 lambda self, val:
None)
252 def combine(self, other):
254 if (type(other) == type(self)
and
255 other.asym_unit.seq_id_range[0]
256 == self.asym_unit.seq_id_range[1] + 1):
257 self.asym_unit.seq_id_range = (self.asym_unit.seq_id_range[0],
258 other.asym_unit.seq_id_range[1])
259 self.count += other.count
263 class _AllModelRepresentations:
264 def __init__(self, simo):
268 self.fragments = OrderedDict()
269 self._all_representations = {}
271 def copy_component(self, state, name, original, asym_unit):
272 """Copy all representation for `original` in `state` to `name`"""
275 newf.asym_unit = asym_unit(*f.asym_unit.seq_id_range)
277 for rep
in self.fragments:
278 if original
in self.fragments[rep]:
279 if name
not in self.fragments[rep]:
280 self.fragments[rep][name] = OrderedDict()
281 self.fragments[rep][name][state] = [
282 copy_frag(f)
for f
in self.fragments[rep][original][state]]
285 first_state = list(self.fragments[rep][name].keys())[0]
286 if state
is first_state:
287 representation = self._all_representations[rep]
288 representation.extend(self.fragments[rep][name][state])
290 def add_fragment(self, state, representation, fragment):
291 """Add a model fragment."""
292 comp = fragment.component
293 id_rep = id(representation)
294 self._all_representations[id_rep] = representation
295 if id_rep
not in self.fragments:
296 self.fragments[id_rep] = OrderedDict()
297 if comp
not in self.fragments[id_rep]:
298 self.fragments[id_rep][comp] = OrderedDict()
299 if state
not in self.fragments[id_rep][comp]:
300 self.fragments[id_rep][comp][state] = []
301 fragments = self.fragments[id_rep][comp][state]
302 if len(fragments) == 0
or not fragments[-1].combine(fragment):
303 fragments.append(fragment)
306 first_state = list(self.fragments[id_rep][comp].keys())[0]
307 if state
is first_state:
308 representation.append(fragment)
312 """Track all datasets generated by PMI and add them to the ihm.System"""
313 def __init__(self, system):
316 self._datasets_by_state = {}
317 self._restraints_by_state = {}
319 def get_all_group(self, state):
320 """Get a DatasetGroup encompassing all datasets so far in this state"""
324 g = ihm.dataset.DatasetGroup(
325 self._datasets_by_state.get(state, [])
326 + [r.dataset
for r
in self._restraints_by_state.get(state, [])
330 def add(self, state, dataset):
331 """Add a new dataset."""
332 self._datasets.append(dataset)
333 if state
not in self._datasets_by_state:
334 self._datasets_by_state[state] = []
335 self._datasets_by_state[state].append(dataset)
338 self.system.orphan_datasets.append(dataset)
342 """Add the dataset for a restraint"""
343 if state
not in self._restraints_by_state:
344 self._restraints_by_state[state] = []
345 self._restraints_by_state[state].append(restraint)
348 class _CrossLinkRestraint(ihm.restraint.CrossLinkRestraint):
349 """Restrain to a set of cross-links"""
352 _label_map = {
'wtDSS':
'DSS',
'scDSS':
'DSS',
'scEDC':
'EDC'}
353 _descriptor_map = {
'DSS': ihm.cross_linkers.dss,
354 'EDC': ihm.cross_linkers.edc}
356 def __init__(self, pmi_restraint):
357 self.pmi_restraint = pmi_restraint
360 linker = getattr(self.pmi_restraint,
'linker',
None)
361 label = self.pmi_restraint.label
364 dataset=self.pmi_restraint.dataset,
365 linker=linker
or self._get_chem_descriptor(label))
368 def _get_chem_descriptor(cls, label):
370 label = cls._label_map.get(label, label)
371 if label
not in cls._descriptor_map:
375 d = ihm.ChemDescriptor(label)
376 cls._descriptor_map[label] = d
377 return cls._descriptor_map[label]
379 def _set_psi_sigma(self, model):
382 if model.m != self.pmi_restraint.model:
384 for resolution
in self.pmi_restraint.sigma_dictionary:
385 statname =
'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, self.label)
386 if model.stats
and statname
in model.stats:
387 sigma = float(model.stats[statname])
388 p = self.pmi_restraint.sigma_dictionary[resolution][0]
389 old_values.append((p, p.get_scale()))
391 for psiindex
in self.pmi_restraint.psi_dictionary:
392 statname =
'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, self.label)
393 if model.stats
and statname
in model.stats:
394 psi = float(model.stats[statname])
395 p = self.pmi_restraint.psi_dictionary[psiindex][0]
396 old_values.append((p, p.get_scale()))
400 return list(reversed(old_values))
402 def add_fits_from_model_statfile(self, model):
404 old_values = self._set_psi_sigma(model)
408 for xl
in self.cross_links:
410 xl.fits[model] = ihm.restraint.CrossLinkFit(
411 psi=xl.psi, sigma1=xl.sigma1, sigma2=xl.sigma2)
413 for p, scale
in old_values:
417 def __set_dataset(self, val):
418 self.pmi_restraint.dataset = val
419 dataset = property(
lambda self: self.pmi_restraint.dataset,
423 def get_asym_mapper_for_state(simo, state, asym_map):
424 asym = asym_map.get(state,
None)
426 asym = _AsymMapper(simo, state.prot)
427 asym_map[state] = asym
434 psi = property(
lambda self: self.psi_p.get_scale(),
435 lambda self, val:
None)
436 sigma1 = property(
lambda self: self.sigma1_p.get_scale(),
437 lambda self, val:
None)
438 sigma2 = property(
lambda self: self.sigma2_p.get_scale(),
439 lambda self, val:
None)
442 class _ResidueCrossLink(ihm.restraint.ResidueCrossLink, _PMICrossLink):
446 class _FeatureCrossLink(ihm.restraint.FeatureCrossLink, _PMICrossLink):
450 class _EM2DRestraint(ihm.restraint.EM2DRestraint):
451 def __init__(self, state, pmi_restraint, image_number, resolution,
452 pixel_size, image_resolution, projection_number,
454 self.pmi_restraint, self.image_number = pmi_restraint, image_number
456 dataset=pmi_restraint.datasets[image_number],
457 assembly=state.modeled_assembly,
458 segment=
False, number_raw_micrographs=micrographs_number,
459 pixel_size_width=pixel_size, pixel_size_height=pixel_size,
460 image_resolution=image_resolution,
461 number_of_projections=projection_number)
464 def __get_dataset(self):
465 return self.pmi_restraint.datasets[self.image_number]
467 def __set_dataset(self, val):
468 self.pmi_restraint.datasets[self.image_number] = val
470 dataset = property(__get_dataset, __set_dataset)
472 def add_fits_from_model_statfile(self, model):
473 ccc = self._get_cross_correlation(model)
474 transform = self._get_transformation(model)
475 rot = transform.get_rotation()
476 rm = [[e
for e
in rot.get_rotation_matrix_row(i)]
for i
in range(3)]
477 self.fits[model] = ihm.restraint.EM2DRestraintFit(
478 cross_correlation_coefficient=ccc,
480 tr_vector=transform.get_translation())
482 def _get_transformation(self, model):
483 """Get the transformation that places the model on the image"""
484 stats = model.em2d_stats
or model.stats
485 prefix =
'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
486 self.image_number + 1)
487 r = [float(stats[prefix +
'_Rotation%d' % i])
for i
in range(4)]
488 t = [float(stats[prefix +
'_Translation%d' % i])
492 inv = model.transform.get_inverse()
496 def _get_cross_correlation(self, model):
497 """Get the cross correlation coefficient between the model projection
499 stats = model.em2d_stats
or model.stats
500 return float(stats[
'ElectronMicroscopy2D_%s_Image%d_CCC'
501 % (self.pmi_restraint.label,
502 self.image_number + 1)])
505 class _EM3DRestraint(ihm.restraint.EM3DRestraint):
507 def __init__(self, simo, state, pmi_restraint, target_ps, densities):
508 self.pmi_restraint = pmi_restraint
510 dataset=pmi_restraint.dataset,
511 assembly=self._get_assembly(densities, simo, state),
512 fitting_method=
'Gaussian mixture models',
513 number_of_gaussians=len(target_ps))
516 def __set_dataset(self, val):
517 self.pmi_restraint.dataset = val
518 dataset = property(
lambda self: self.pmi_restraint.dataset,
521 def _get_assembly(self, densities, simo, state):
522 """Get the Assembly that this restraint acts on"""
523 cm = _ComponentMapper(state.prot)
526 components[cm[d]] =
None
527 a = simo._get_subassembly(
528 components, name=
"EM subassembly",
529 description=
"All components that fit the EM map")
532 def add_fits_from_model_statfile(self, model):
533 ccc = self._get_cross_correlation(model)
534 self.fits[model] = ihm.restraint.EM3DRestraintFit(
535 cross_correlation_coefficient=ccc)
537 def _get_cross_correlation(self, model):
538 """Get the cross correlation coefficient between the model
540 if model.stats
is not None:
541 return float(model.stats[
'GaussianEMRestraint_%s_CCC'
542 % self.pmi_restraint.label])
545 class _GeometricRestraint(ihm.restraint.GeometricRestraint):
547 def __init__(self, simo, state, pmi_restraint, geometric_object,
548 feature, distance, sigma):
549 self.pmi_restraint = pmi_restraint
551 dataset=pmi_restraint.dataset,
552 geometric_object=geometric_object, feature=feature,
553 distance=distance, harmonic_force_constant=1. / sigma,
557 def __set_dataset(self, val):
558 self.pmi_restraint.dataset = val
559 dataset = property(
lambda self: self.pmi_restraint.dataset,
563 class _ReplicaExchangeProtocolStep(ihm.protocol.Step):
564 def __init__(self, state, rex):
565 if rex.monte_carlo_sample_objects
is not None:
566 method =
'Replica exchange monte carlo'
568 method =
'Replica exchange molecular dynamics'
569 self.monte_carlo_temperature = rex.vars[
'monte_carlo_temperature']
570 self.replica_exchange_minimum_temperature = \
571 rex.vars[
'replica_exchange_minimum_temperature']
572 self.replica_exchange_maximum_temperature = \
573 rex.vars[
'replica_exchange_maximum_temperature']
575 assembly=state.modeled_assembly,
577 method=method, name=
'Sampling',
578 num_models_begin=
None,
579 num_models_end=rex.vars[
"number_of_frames"],
580 multi_scale=
True, multi_state=
False, ordered=
False, ensemble=
True)
583 class _ReplicaExchangeProtocolDumper(ihm.dumper.Dumper):
584 """Write IMP-specific information about replica exchange to mmCIF.
585 Note that IDs will have already been assigned by python-ihm's
586 standard modeling protocol dumper."""
587 def dump(self, system, writer):
588 with writer.loop(
"_imp_replica_exchange_protocol",
589 [
"protocol_id",
"step_id",
"monte_carlo_temperature",
590 "replica_exchange_minimum_temperature",
591 "replica_exchange_maximum_temperature"])
as lp:
592 for p
in system._all_protocols():
594 if isinstance(s, _ReplicaExchangeProtocolStep):
595 self._dump_step(p, s, lp)
597 def _dump_step(self, p, s, lp):
598 mintemp = s.replica_exchange_minimum_temperature
599 maxtemp = s.replica_exchange_maximum_temperature
600 lp.write(protocol_id=p._id, step_id=s._id,
601 monte_carlo_temperature=s.monte_carlo_temperature,
602 replica_exchange_minimum_temperature=mintemp,
603 replica_exchange_maximum_temperature=maxtemp)
606 class _ReplicaExchangeProtocolHandler(ihm.reader.Handler):
607 category =
'_imp_replica_exchange_protocol'
609 """Read IMP-specific information about replica exchange from mmCIF."""
610 def __call__(self, protocol_id, step_id, monte_carlo_temperature,
611 replica_exchange_minimum_temperature,
612 replica_exchange_maximum_temperature):
613 p = self.sysr.protocols.get_by_id(protocol_id)
615 s = p.steps[int(step_id)-1]
617 s.__class__ = _ReplicaExchangeProtocolStep
618 s.monte_carlo_temperature = \
619 self.get_float(monte_carlo_temperature)
620 s.replica_exchange_minimum_temperature = \
621 self.get_float(replica_exchange_minimum_temperature)
622 s.replica_exchange_maximum_temperature = \
623 self.get_float(replica_exchange_maximum_temperature)
626 class _SimpleProtocolStep(ihm.protocol.Step):
627 def __init__(self, state, num_models_end, method):
629 assembly=state.modeled_assembly,
631 method=method, name=
'Sampling',
632 num_models_begin=
None,
633 num_models_end=num_models_end,
634 multi_scale=
True, multi_state=
False, ordered=
False,
639 """Represent a single chain in a Model"""
640 def __init__(self, pmi_chain_id, asym_unit):
641 self.pmi_chain_id, self.asym_unit = pmi_chain_id, asym_unit
645 def add(self, xyz, atom_type, residue_type, residue_index,
646 all_indexes, radius):
647 if atom_type
is None:
648 self.spheres.append((xyz, residue_type, residue_index,
649 all_indexes, radius))
651 self.atoms.append((xyz, atom_type, residue_type, residue_index,
652 all_indexes, radius))
653 orig_comp = property(
lambda self: self.comp)
656 class _TransformedChain:
657 """Represent a chain that is a transformed version of another"""
658 def __init__(self, orig_chain, asym_unit, transform):
659 self.orig_chain, self.asym_unit = orig_chain, asym_unit
660 self.transform = transform
662 def __get_spheres(self):
663 for (xyz, residue_type, residue_index, all_indexes,
664 radius)
in self.orig_chain.spheres:
665 yield (self.transform * xyz, residue_type, residue_index,
667 spheres = property(__get_spheres)
669 def __get_atoms(self):
670 for (xyz, atom_type, residue_type, residue_index, all_indexes,
671 radius)
in self.orig_chain.atoms:
672 yield (self.transform * xyz, atom_type, residue_type,
673 residue_index, all_indexes, radius)
674 atoms = property(__get_atoms)
676 entity = property(
lambda self: self.orig_chain.entity)
677 orig_comp = property(
lambda self: self.orig_chain.comp)
681 def __init__(self, component, simo):
682 self._seqranges = simo._exclude_coords.get(component, [])
684 def is_excluded(self, indexes):
685 """Return True iff the given sequence range is excluded."""
686 for seqrange
in self._seqranges:
687 if indexes[0] >= seqrange[0]
and indexes[-1] <= seqrange[1]:
691 class _Model(ihm.model.Model):
692 def __init__(self, prot, simo, protocol, assembly, representation):
693 super().__init__(assembly=assembly, protocol=protocol,
694 representation=representation)
695 self.simo = weakref.proxy(simo)
701 self.em2d_stats =
None
704 self._is_restrained =
True
707 self.m = prot.get_model()
708 o.dictionary_pdbs[name] = prot
709 o._init_dictchain(name, prot, multichar_chain=
True)
710 (particle_infos_for_pdb,
711 self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
713 self._make_spheres_atoms(particle_infos_for_pdb, o, name, simo)
716 def all_chains(self, simo):
717 """Yield all chains, including transformed ones"""
719 for c
in self.chains:
721 chain_for_comp[c.comp] = c
722 for tc
in simo._transformed_components:
723 orig_chain = chain_for_comp.get(tc.original,
None)
725 asym = simo.asym_units[tc.name]
726 c = _TransformedChain(orig_chain, asym, tc.transform)
730 def _make_spheres_atoms(self, particle_infos_for_pdb, o, name, simo):
731 entity_for_chain = {}
734 for protname, chain_id
in o.dictchain[name].items():
735 if protname
in simo.entities:
736 entity_for_chain[chain_id] = simo.entities[protname]
739 pn = protname.split(
'.')[0]
740 entity_for_chain[chain_id] = simo.entities[pn]
741 comp_for_chain[chain_id] = protname
745 correct_asym[chain_id] = simo.asym_units[protname]
752 for (xyz, atom_type, residue_type, chain_id, residue_index,
753 all_indexes, radius)
in particle_infos_for_pdb:
754 if chain
is None or chain.pmi_chain_id != chain_id:
755 chain = _Chain(chain_id, correct_asym[chain_id])
756 chain.entity = entity_for_chain[chain_id]
757 chain.comp = comp_for_chain[chain_id]
758 self.chains.append(chain)
759 excluder = _Excluder(chain.comp, simo)
760 if not excluder.is_excluded(all_indexes
if all_indexes
761 else [residue_index]):
762 chain.add(xyz, atom_type, residue_type, residue_index,
765 def parse_rmsf_file(self, fname, component):
766 self.rmsf[component] = rmsf = {}
767 with open(str(fname))
as fh:
769 resnum, blocknum, val = line.split()
770 rmsf[int(resnum)] = (int(blocknum), float(val))
772 def get_rmsf(self, component, indexes):
773 """Get the RMSF value for the given residue indexes."""
776 rmsf = self.rmsf[component]
777 blocknums = dict.fromkeys(rmsf[ind][0]
for ind
in indexes)
778 if len(blocknums) != 1:
779 raise ValueError(
"Residue indexes %s aren't all in the same block"
781 return rmsf[indexes[0]][1]
784 for chain
in self.all_chains(self.simo):
785 pmi_offset = chain.asym_unit.entity.pmi_offset
786 for atom
in chain.atoms:
787 (xyz, atom_type, residue_type, residue_index,
788 all_indexes, radius) = atom
789 pt = self.transform * xyz
790 yield ihm.model.Atom(
791 asym_unit=chain.asym_unit,
792 seq_id=residue_index - pmi_offset,
793 atom_id=atom_type.get_string(),
795 x=pt[0], y=pt[1], z=pt[2])
797 def get_spheres(self):
798 for chain
in self.all_chains(self.simo):
799 pmi_offset = chain.asym_unit.entity.pmi_offset
800 for sphere
in chain.spheres:
801 (xyz, residue_type, residue_index,
802 all_indexes, radius) = sphere
803 if all_indexes
is None:
804 all_indexes = (residue_index,)
805 pt = self.transform * xyz
806 yield ihm.model.Sphere(
807 asym_unit=chain.asym_unit,
808 seq_id_range=(all_indexes[0] - pmi_offset,
809 all_indexes[-1] - pmi_offset),
810 x=pt[0], y=pt[1], z=pt[2], radius=radius,
811 rmsf=self.get_rmsf(chain.orig_comp, all_indexes))
815 def __init__(self, simo):
816 self.simo = weakref.proxy(simo)
818 self.protocols = OrderedDict()
820 def add_protocol(self, state):
821 """Add a new Protocol"""
822 if state
not in self.protocols:
823 self.protocols[state] = []
824 p = ihm.protocol.Protocol()
825 self.simo.system.orphan_protocols.append(p)
826 self.protocols[state].append(p)
828 def add_step(self, step, state):
829 """Add a ProtocolStep to the last Protocol of the given State"""
830 if state
not in self.protocols:
831 self.add_protocol(state)
832 protocol = self.get_last_protocol(state)
833 if len(protocol.steps) == 0:
834 step.num_models_begin = 0
836 step.num_models_begin = protocol.steps[-1].num_models_end
837 protocol.steps.append(step)
838 step.id = len(protocol.steps)
840 step.dataset_group = self.simo.all_datasets.get_all_group(state)
842 def add_postproc(self, step, state):
843 """Add a postprocessing step to the last protocol"""
844 protocol = self.get_last_protocol(state)
845 if len(protocol.analyses) == 0:
846 protocol.analyses.append(ihm.analysis.Analysis())
847 protocol.analyses[-1].steps.append(step)
849 def get_last_protocol(self, state):
850 """Return the most recently-added _Protocol"""
851 return self.protocols[state][-1]
854 class _AllStartingModels:
855 def __init__(self, simo):
859 self.models = OrderedDict()
862 def add_pdb_fragment(self, fragment):
863 """Add a starting model PDB fragment."""
864 comp = fragment.component
865 state = fragment.state
866 if comp
not in self.models:
867 self.models[comp] = OrderedDict()
868 if state
not in self.models[comp]:
869 self.models[comp][state] = []
870 models = self.models[comp][state]
871 if len(models) == 0 \
872 or models[-1].fragments[0].pdbname != fragment.pdbname:
873 model = self._add_model(fragment)
877 models[-1].fragments.append(weakref.proxy(fragment))
881 pmi_offset = models[-1].asym_unit.entity.pmi_offset
882 sid_begin = min(fragment.start - pmi_offset,
883 models[-1].asym_unit.seq_id_range[0])
884 sid_end = max(fragment.end - pmi_offset,
885 models[-1].asym_unit.seq_id_range[1])
886 models[-1].asym_unit = fragment.asym_unit.asym(sid_begin, sid_end)
887 fragment.starting_model = models[-1]
889 def _add_model(self, f):
890 if (hasattr(ihm.metadata,
'CIFParser')
891 and f.pdbname.endswith(
'.cif')):
892 parser = ihm.metadata.CIFParser()
893 elif (hasattr(ihm.metadata,
'BinaryCIFParser')
894 and f.pdbname.endswith(
'.bcif')):
895 parser = ihm.metadata.BinaryCIFParser()
897 parser = ihm.metadata.PDBParser()
898 r = parser.parse_file(f.pdbname)
900 self.simo._add_dataset(r[
'dataset'])
902 templates = r[
'templates'].get(f.chain, [])
905 self.simo.system.locations.append(t.alignment_file)
907 self.simo._add_dataset(t.dataset)
908 source = r.get(
'entity_source', {}).get(f.chain)
910 f.asym_unit.entity.source = source
911 pmi_offset = f.asym_unit.entity.pmi_offset
913 asym_unit=f.asym_unit.asym.pmi_range(f.start, f.end),
914 dataset=r[
'dataset'], asym_id=f.chain,
915 templates=templates, offset=f.offset + pmi_offset,
916 metadata=r.get(
'metadata'),
917 software=r[
'software'][0]
if r[
'software']
else None,
918 script_file=r[
'script'])
919 m.fragments = [weakref.proxy(f)]
923 class _StartingModel(ihm.startmodel.StartingModel):
924 def get_seq_dif(self):
928 pmi_offset = self.asym_unit.entity.pmi_offset
929 mh = IMP.mmcif.data._StartingModelAtomHandler(self.templates,
931 for f
in self.fragments:
934 residue_indexes=list(range(f.start - f.offset,
935 f.end - f.offset + 1)))
936 for a
in mh.get_ihm_atoms(sel.get_selected_particles(),
937 f.offset - pmi_offset):
939 self._seq_dif = mh._seq_dif
942 class _ReplicaExchangeAnalysisPostProcess(ihm.analysis.ClusterStep):
943 """Post processing using AnalysisReplicaExchange0 macro"""
945 def __init__(self, rex, num_models_begin):
948 for fname
in self.get_all_stat_files():
949 with open(str(fname))
as fh:
950 num_models_end += len(fh.readlines())
952 feature=
'RMSD', num_models_begin=num_models_begin,
953 num_models_end=num_models_end)
955 def get_stat_file(self, cluster_num):
956 return self.rex._outputdir / (
"cluster.%d" % cluster_num) /
'stat.out'
958 def get_all_stat_files(self):
959 for i
in range(self.rex._number_of_clusters):
960 yield self.get_stat_file(i)
963 class _ReplicaExchangeAnalysisEnsemble(ihm.model.Ensemble):
964 """Ensemble generated using AnalysisReplicaExchange0 macro"""
966 num_models_deposited =
None
968 def __init__(self, pp, cluster_num, model_group, num_deposit):
969 with open(str(pp.get_stat_file(cluster_num)))
as fh:
970 num_models = len(fh.readlines())
972 num_models=num_models,
973 model_group=model_group, post_process=pp,
974 clustering_feature=pp.feature,
975 name=model_group.name)
976 self.cluster_num = cluster_num
977 self.num_models_deposited = num_deposit
979 def get_rmsf_file(self, component):
980 return (self.post_process.rex._outputdir
981 / (
'cluster.%d' % self.cluster_num)
982 / (
'rmsf.%s.dat' % component))
984 def load_rmsf(self, model, component):
985 fname = self.get_rmsf_file(component)
987 model.parse_rmsf_file(fname, component)
989 def get_localization_density_file(self, fname):
990 return (self.post_process.rex._outputdir
991 / (
'cluster.%d' % self.cluster_num)
992 / (
'%s.mrc' % fname))
994 def load_localization_density(self, state, fname, select_tuple,
996 fullpath = self.get_localization_density_file(fname)
997 if fullpath.exists():
998 details =
"Localization density for %s %s" \
999 % (fname, self.model_group.name)
1000 local_file = ihm.location.OutputFileLocation(str(fullpath),
1002 for s
in select_tuple:
1003 if isinstance(s, tuple)
and len(s) == 3:
1004 asym = asym_units[s[2]].pmi_range(s[0], s[1])
1006 asym = asym_units[s]
1007 den = ihm.model.LocalizationDensity(file=local_file,
1009 self.densities.append(den)
1011 def load_all_models(self, simo, state):
1012 stat_fname = self.post_process.get_stat_file(self.cluster_num)
1014 with open(str(stat_fname))
as fh:
1015 stats = ast.literal_eval(fh.readline())
1017 rmf_file = stat_fname.parent / (
"%d.rmf3" % model_num)
1019 if rmf_file.exists():
1020 rh = RMF.open_rmf_file_read_only(str(rmf_file))
1021 system = state._pmi_object.system
1027 if model_num >= self.num_models_deposited:
1031 def _get_precision(self):
1032 precfile = (self.post_process.rex._outputdir /
1033 (
"precision.%d.%d.out" % (self.cluster_num,
1035 if not precfile.exists():
1039 r'All .*/cluster.%d/ average centroid distance ([\d\.]+)'
1041 with open(str(precfile))
as fh:
1045 return float(m.group(1))
1047 precision = property(
lambda self: self._get_precision(),
1048 lambda self, val:
None)
1051 class _SimpleEnsemble(ihm.model.Ensemble):
1052 """Simple manually-created ensemble"""
1054 num_models_deposited =
None
1056 def __init__(self, pp, model_group, num_models, drmsd,
1057 num_models_deposited, ensemble_file):
1059 model_group=model_group, post_process=pp, num_models=num_models,
1060 file=ensemble_file, precision=drmsd, name=model_group.name,
1061 clustering_feature=
'dRMSD')
1062 self.num_models_deposited = num_models_deposited
1064 def load_localization_density(self, state, component, asym, local_file):
1065 den = ihm.model.LocalizationDensity(file=local_file, asym_unit=asym)
1066 self.densities.append(den)
1069 class _CustomDNAAlphabet:
1070 """Custom DNA alphabet that maps A,C,G,T (rather than DA,DC,DG,DT
1071 as in python-ihm)"""
1072 _comps = dict([cc.code_canonical, cc]
1073 for cc
in ihm.DNAAlphabet._comps.values())
1076 class _EntityMapper(dict):
1077 """Handle mapping from IMP components (without copy number) to CIF
1078 entities. Multiple components may map to the same entity if they
1080 def __init__(self, system):
1082 self._sequence_dict = {}
1084 self.system = system
1086 def _get_alphabet(self, alphabet):
1087 """Map a PMI alphabet to an IHM alphabet"""
1091 alphabet_map = {
None: ihm.LPeptideAlphabet,
1092 IMP.pmi.alphabets.amino_acid: ihm.LPeptideAlphabet,
1093 IMP.pmi.alphabets.rna: ihm.RNAAlphabet,
1094 IMP.pmi.alphabets.dna: _CustomDNAAlphabet}
1095 if alphabet
in alphabet_map:
1096 return alphabet_map[alphabet]
1098 raise TypeError(
"Don't know how to handle %s" % alphabet)
1100 def add(self, component_name, sequence, offset, alphabet, uniprot):
1101 def entity_seq(sequence):
1104 return [
'UNK' if s ==
'X' else s
for s
in sequence]
1107 if sequence
not in self._sequence_dict:
1110 d = component_name.split(
"@")[0].split(
".")[0]
1111 entity = Entity(entity_seq(sequence), description=d,
1113 alphabet=self._get_alphabet(alphabet),
1115 self.system.entities.append(entity)
1116 self._sequence_dict[sequence] = entity
1117 self[component_name] = self._sequence_dict[sequence]
1120 class _TransformedComponent:
1121 def __init__(self, name, original, transform):
1122 self.name, self.original, self.transform = name, original, transform
1126 """Class with similar interface to weakref.ref, but keeps a strong ref"""
1127 def __init__(self, ref):
1134 class _State(ihm.model.State):
1135 """Representation of a single state in the system."""
1137 def __init__(self, pmi_object, po):
1142 self._pmi_object = weakref.proxy(pmi_object)
1143 if hasattr(pmi_object,
'state'):
1146 self._pmi_state = _SimpleRef(pmi_object.state)
1148 self._pmi_state = weakref.ref(pmi_object)
1150 old_name = self.name
1151 super().__init__(experiment_type=
'Fraction of bulk')
1152 self.name = old_name
1156 self.modeled_assembly = ihm.Assembly(
1157 name=
"Modeled assembly",
1158 description=self.get_postfixed_name(
1159 "All components modeled by IMP"))
1160 po.system.orphan_assemblies.append(self.modeled_assembly)
1162 self.all_modeled_components = []
1165 return hash(self._pmi_state())
1167 def __eq__(self, other):
1168 return self._pmi_state() == other._pmi_state()
1170 def add_model_group(self, group):
1174 def get_prefixed_name(self, name):
1175 """Prefix the given name with the state name, if available."""
1177 return self.short_name +
' ' + name
1181 return name[0].upper() + name[1:]
if name
else ''
1183 def get_postfixed_name(self, name):
1184 """Postfix the given name with the state name, if available."""
1186 return "%s in state %s" % (name, self.short_name)
1190 short_name = property(
lambda self: self._pmi_state().short_name)
1191 long_name = property(
lambda self: self._pmi_state().long_name)
1193 def __get_name(self):
1194 return self._pmi_state().long_name
1196 def __set_name(self, val):
1197 self._pmi_state().long_name = val
1199 name = property(__get_name, __set_name)
1203 """A single entity in the system. This contains information (such as
1204 database identifiers) specific to a particular sequence rather than
1205 a copy (for example, when modeling a homodimer, two AsymUnits will
1206 point to the same Entity).
1208 This functions identically to the base ihm.Entity class, but it
1209 allows identifying residues by either the PMI numbering scheme
1210 (which is always contiguous starting at 1, covering the entire
1211 sequence in the FASTA files, or the IHM scheme (seq_id, which also
1212 starts at 1, but which only covers the modeled subset of the full
1213 sequence, with non-modeled N-terminal or C-terminal residues
1214 removed). The actual offset (which is the integer to be added to the
1215 IHM numbering to get PMI numbering, or equivalently the number of
1216 not-represented N-terminal residues in the PMI sequence) is
1217 available in the `pmi_offset` member.
1219 If a UniProt accession was provided for the sequence (either when
1220 State.create_molecule() was called, or in the FASTA alignment file
1221 header) then that is available in the `uniprot` member, and can be
1222 added to the IHM system with the add_uniprot_reference method.
1224 def __init__(self, sequence, pmi_offset, uniprot, *args, **keys):
1227 self.pmi_offset = pmi_offset
1228 self.uniprot = uniprot
1229 super().__init__(sequence, *args, **keys)
1232 """Return a single IHM residue indexed using PMI numbering"""
1233 return self.residue(res_id - self.pmi_offset)
1236 """Return a range of IHM residues indexed using PMI numbering"""
1237 off = self.pmi_offset
1238 return self(res_id_begin - off, res_id_end - off)
1241 """Add UniProt accession (if available) to the IHM system.
1242 If a UniProt accession was provided for the sequence (either when
1243 State.create_molecule() was called, or in the FASTA alignment file
1244 header), then look this up at the UniProt web site (requires
1245 network access) to get full information, and add it to the IHM
1246 system. The resulting reference object is returned. If the IMP
1247 and UniProt sequences are not identical, then this object may
1248 need to be modified by specifying an alignment and/or
1249 single-point mutations.
1252 print(
'Adding UniProt accession %s reference for entity %s'
1253 % (self.uniprot, self.description))
1254 ref = ihm.reference.UniProtSequence.from_accession(self.uniprot)
1255 self.references.append(ref)
1260 """A single asymmetric unit in the system. This roughly corresponds to
1261 a single PMI subunit (sequence, copy, or clone).
1263 This functions identically to the base ihm.AsymUnit class, but it
1264 allows identifying residues by either the PMI numbering scheme
1265 (which is always contiguous starting at 1, covering the entire
1266 sequence in the FASTA files, or the IHM scheme (seq_id, which also
1267 starts at 1, but which only covers the modeled subset of the full
1268 sequence, with non-modeled N-terminal or C-terminal residues
1271 The `entity` member of this class points to an Entity object, which
1272 contains information (such as database identifiers) specific to
1273 a particular sequence rather than a copy (for example, when modeling
1274 a homodimer, two AsymUnits will point to the same Entity).
1277 def __init__(self, entity, *args, **keys):
1279 entity, auth_seq_id_map=entity.pmi_offset, *args, **keys)
1282 """Return a single IHM residue indexed using PMI numbering"""
1283 return self.residue(res_id - self.entity.pmi_offset)
1286 """Return a range of IHM residues indexed using PMI numbering"""
1287 off = self.entity.pmi_offset
1288 return self(res_id_begin - off, res_id_end - off)
1292 """Class to encode a modeling protocol as mmCIF.
1294 IMP has basic support for writing out files in mmCIF format, for
1295 deposition in [PDB-IHM](https://pdb-ihm.org/).
1296 After creating an instance of this class, attach it to an
1297 IMP.pmi.topology.System object. After this, any
1298 generated models and metadata are automatically collected in the
1299 `system` attribute, which is an
1300 [ihm.System](https://python-ihm.readthedocs.io/en/latest/main.html#ihm.System) object.
1301 Once the protocol is complete, call finalize() to make sure `system`
1302 contains everything, then use the
1303 [python-ihm API](https://python-ihm.readthedocs.io/en/latest/dumper.html#ihm.dumper.write)
1304 to write out files in mmCIF or BinaryCIF format.
1306 Each PMI subunit will be mapped to an IHM AsymUnit class which contains the
1307 subset of the sequence that was represented. Use the `asym_units` dict to
1308 get this object given a PMI subunit name. Each unique sequence will be
1309 mapped to an IHM Entity class (for example when modeling a homodimer
1310 there will be two AsymUnits which both point to the same Entity). Use
1311 the `entities` dict to get this object from a PMI subunit name.
1313 See also Entity, AsymUnit, get_handlers(), get_dumpers().
1317 self.system = ihm.System()
1318 self._state_group = ihm.model.StateGroup()
1319 self.system.state_groups.append(self._state_group)
1321 self._state_ensemble_offset = 0
1322 self._main_script = os.path.abspath(sys.argv[0])
1325 loc = ihm.location.WorkflowFileLocation(
1326 path=self._main_script,
1327 details=
"The main integrative modeling script")
1328 self.system.locations.append(loc)
1331 self.__asym_states = {}
1332 self._working_directory = os.getcwd()
1334 "Default representation")
1335 self.entities = _EntityMapper(self.system)
1337 self.asym_units = {}
1338 self._all_components = {}
1339 self.all_modeled_components = []
1340 self._transformed_components = []
1341 self.sequence_dict = {}
1344 self._xy_plane = ihm.geometry.XYPlane()
1345 self._xz_plane = ihm.geometry.XZPlane()
1346 self._z_axis = ihm.geometry.ZAxis()
1347 self._center_origin = ihm.geometry.Center(0, 0, 0)
1348 self._identity_transform = ihm.geometry.Transformation.identity()
1351 self._exclude_coords = {}
1353 self.all_representations = _AllModelRepresentations(self)
1354 self.all_protocols = _AllProtocols(self)
1355 self.all_datasets = _AllDatasets(self.system)
1356 self.all_starting_models = _AllStartingModels(self)
1358 self.all_software = _AllSoftware(self.system)
1361 """Create a new Representation and return it. This can be
1362 passed to add_model(), add_bead_element() or add_pdb_element()."""
1363 r = ihm.representation.Representation(name=name)
1364 self.system.orphan_representations.append(r)
1368 """Don't record coordinates for the given domain.
1369 Coordinates for the given domain (specified by a component name
1370 and a 2-element tuple giving the start and end residue numbers)
1371 will be excluded from the mmCIF file. This can be used to exclude
1372 parts of the structure that weren't well resolved in modeling.
1373 Any bead or residue that lies wholly within this range will be
1374 excluded. Multiple ranges for a given component can be excluded
1375 by calling this method multiple times."""
1376 if component
not in self._exclude_coords:
1377 self._exclude_coords[component] = []
1378 self._exclude_coords[component].append(seqrange)
1380 def _is_excluded(self, component, start, end):
1381 """Return True iff this chunk of sequence should be excluded"""
1382 for seqrange
in self._exclude_coords.get(component, ()):
1383 if start >= seqrange[0]
and end <= seqrange[1]:
1386 def _add_state(self, state):
1387 """Create a new state and return a pointer to it."""
1388 self._state_ensemble_offset = len(self.system.ensembles)
1389 s = _State(state, self)
1390 self._state_group.append(s)
1391 self._last_state = s
1394 def _get_chain_for_component(self, name, output):
1395 """Get the chain ID for a component, if any."""
1397 if name
in self.asym_units:
1398 return self.asym_units[name]._id
1403 def _get_assembly_comps(self, assembly):
1404 """Get the names of the components in the given assembly"""
1408 comps[ca.details] =
None
1412 """Make a new component that's a transformed copy of another.
1413 All representation for the existing component is copied to the
1415 assembly_comps = self._get_assembly_comps(state.modeled_assembly)
1416 if name
in assembly_comps:
1417 raise ValueError(
"Component %s already exists" % name)
1418 elif original
not in assembly_comps:
1419 raise ValueError(
"Original component %s does not exist" % original)
1420 self.create_component(state, name,
True)
1421 self.add_component_sequence(state, name, self.sequence_dict[original])
1422 self._transformed_components.append(_TransformedComponent(
1423 name, original, transform))
1424 self.all_representations.copy_component(state, name, original,
1425 self.asym_units[name])
1427 def create_component(self, state, name, modeled, asym_name=None):
1428 if asym_name
is None:
1430 new_comp = name
not in self._all_components
1431 self._all_components[name] =
None
1433 state.all_modeled_components.append(name)
1434 if asym_name
not in self.asym_units:
1436 self.asym_units[asym_name] =
None
1438 self.all_modeled_components.append(name)
1440 def add_component_sequence(self, state, name, seq, asym_name=None,
1441 alphabet=
None, uniprot=
None):
1442 if asym_name
is None:
1445 if name
in self.sequence_dict:
1446 if self.sequence_dict[name] != seq:
1447 raise ValueError(
"Sequence mismatch for component %s" % name)
1449 self.sequence_dict[name] = seq
1453 self.entities.add(name, seq, 0, alphabet, uniprot)
1454 if asym_name
in self.asym_units:
1455 if self.asym_units[asym_name]
is None:
1457 entity = self.entities[name]
1458 asym =
AsymUnit(entity, details=asym_name)
1459 self.system.asym_units.append(asym)
1460 self.asym_units[asym_name] = asym
1461 state.modeled_assembly.append(self.asym_units[asym_name])
1464 """Called immediately after the PMI system is built"""
1465 for entity
in self.system.entities:
1466 _trim_unrep_termini(entity, self.system.asym_units,
1467 self.system.orphan_representations)
1469 def _add_restraint_model_fits(self):
1470 """Add fits to restraints for all known models"""
1471 for group, m
in self.system._all_models():
1472 if m._is_restrained:
1473 for r
in self.system.restraints:
1474 if hasattr(r,
'add_fits_from_model_statfile'):
1475 r.add_fits_from_model_statfile(m)
1478 """Do any final processing on the class hierarchy.
1479 After calling this method, the `system` member (an instance
1480 of `ihm.System`) completely reproduces the PMI modeling, and
1481 can be written out to an mmCIF file with `ihm.dumper.write`,
1482 and/or modified using the ihm API."""
1483 self._add_restraint_model_fits()
1485 def add_pdb_element(self, state, name, start, end, offset, pdbname,
1486 chain, hier, representation=
None):
1487 if self._is_excluded(name, start, end):
1489 if representation
is None:
1490 representation = self.default_representation
1491 asym = self.asym_units[name]
1492 p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
1494 self.all_representations.add_fragment(state, representation, p)
1495 self.all_starting_models.add_pdb_fragment(p)
1497 def add_bead_element(self, state, name, start, end, num, hier,
1498 representation=
None):
1499 if self._is_excluded(name, start, end):
1501 if representation
is None:
1502 representation = self.default_representation
1503 asym = self.asym_units[name]
1504 pmi_offset = asym.entity.pmi_offset
1505 b = _BeadsFragment(state, name, start - pmi_offset, end - pmi_offset,
1507 self.all_representations.add_fragment(state, representation, b)
1509 def get_cross_link_group(self, pmi_restraint):
1510 r = _CrossLinkRestraint(pmi_restraint)
1511 self.system.restraints.append(r)
1512 self._add_restraint_dataset(r)
1515 def add_experimental_cross_link(self, r1, c1, r2, c2, rsr):
1516 if c1
not in self._all_components
or c2
not in self._all_components:
1522 e1 = self.entities[c1]
1523 e2 = self.entities[c2]
1524 xl = ihm.restraint.ExperimentalCrossLink(residue1=e1.pmi_residue(r1),
1525 residue2=e2.pmi_residue(r2))
1526 rsr.experimental_cross_links.append([xl])
1529 def add_cross_link(self, state, ex_xl, p1, p2, length, sigma1_p, sigma2_p,
1532 asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1533 d = ihm.restraint.UpperBoundDistanceRestraint(length)
1535 if _get_by_residue(p1)
and _get_by_residue(p2):
1536 cls = _ResidueCrossLink
1538 cls = _FeatureCrossLink
1539 xl = cls(ex_xl, asym1=asym[p1], asym2=asym[p2], distance=d,
1542 xl.psi_p, xl.sigma1_p, xl.sigma2_p = psi_p, sigma1_p, sigma2_p
1543 rsr.cross_links.append(xl)
1545 def add_replica_exchange(self, state, rex):
1550 step = _ReplicaExchangeProtocolStep(state, rex)
1551 step.software = self.all_software.pmi
1552 self.all_protocols.add_step(step, state)
1554 def _add_simple_dynamics(self, num_models_end, method):
1556 state = self._last_state
1557 self.all_protocols.add_step(_SimpleProtocolStep(state, num_models_end,
1560 def _add_protocol(self):
1562 state = self._last_state
1563 self.all_protocols.add_protocol(state)
1565 def _add_dataset(self, dataset):
1566 return self.all_datasets.add(self._last_state, dataset)
1568 def _add_restraint_dataset(self, restraint):
1569 return self.all_datasets.add_restraint(self._last_state, restraint)
1571 def _add_simple_postprocessing(self, num_models_begin, num_models_end):
1573 state = self._last_state
1574 pp = ihm.analysis.ClusterStep(
'RMSD', num_models_begin, num_models_end)
1575 self.all_protocols.add_postproc(pp, state)
1578 def _add_no_postprocessing(self, num_models):
1580 state = self._last_state
1581 pp = ihm.analysis.EmptyStep()
1582 pp.num_models_begin = pp.num_models_end = num_models
1583 self.all_protocols.add_postproc(pp, state)
1586 def _add_simple_ensemble(self, pp, name, num_models, drmsd,
1587 num_models_deposited, localization_densities,
1589 """Add an ensemble generated by ad hoc methods (not using PMI).
1590 This is currently only used by the Nup84 system."""
1592 state = self._last_state
1593 group = ihm.model.ModelGroup(name=state.get_postfixed_name(name))
1594 state.add_model_group(group)
1596 self.system.locations.append(ensemble_file)
1597 e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
1599 self.system.ensembles.append(e)
1600 for c
in state.all_modeled_components:
1601 den = localization_densities.get(c,
None)
1603 e.load_localization_density(state, c, self.asym_units[c], den)
1607 """Point a previously-created ensemble to an 'all-models' file.
1608 This could be a trajectory such as DCD, an RMF, or a multimodel
1610 self.system.locations.append(location)
1612 ind = i + self._state_ensemble_offset
1613 self.system.ensembles[ind].file = location
1615 def add_replica_exchange_analysis(self, state, rex, density_custom_ranges):
1621 protocol = self.all_protocols.get_last_protocol(state)
1622 num_models = protocol.steps[-1].num_models_end
1623 pp = _ReplicaExchangeAnalysisPostProcess(rex, num_models)
1624 pp.software = self.all_software.pmi
1625 self.all_protocols.add_postproc(pp, state)
1626 for i
in range(rex._number_of_clusters):
1627 group = ihm.model.ModelGroup(name=state.get_prefixed_name(
1628 'cluster %d' % (i + 1)))
1629 state.add_model_group(group)
1631 e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
1632 self.system.ensembles.append(e)
1634 for fname, stuple
in sorted(density_custom_ranges.items()):
1635 e.load_localization_density(state, fname, stuple,
1637 for stats
in e.load_all_models(self, state):
1638 m = self.add_model(group)
1641 m.name =
'Best scoring model'
1644 for c
in state.all_modeled_components:
1647 def _get_subassembly(self, comps, name, description):
1648 """Get an Assembly consisting of the given components.
1649 `compdict` is a dictionary of the components to add, where keys
1650 are the component names and values are the sequence ranges (or
1651 None to use all residues in the component)."""
1653 for comp, seqrng
in comps.items():
1654 a = self.asym_units[comp]
1655 asyms.append(a
if seqrng
is None else a(*seqrng))
1657 a = ihm.Assembly(asyms, name=name, description=description)
1660 def _add_foxs_restraint(self, model, comp, seqrange, dataset, rg, chi,
1662 """Add a basic FoXS fit. This is largely intended for use from the
1664 assembly = self._get_subassembly(
1666 name=
"SAXS subassembly",
1667 description=
"All components that fit SAXS data")
1668 r = ihm.restraint.SASRestraint(
1669 dataset, assembly, segment=
False,
1670 fitting_method=
'FoXS', fitting_atom_type=
'Heavy atoms',
1671 multi_state=
False, radius_of_gyration=rg, details=details)
1672 r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi)
1673 self.system.restraints.append(r)
1674 self._add_restraint_dataset(r)
1676 def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
1677 image_resolution, projection_number,
1678 micrographs_number):
1679 r = _EM2DRestraint(state, r, i, resolution, pixel_size,
1680 image_resolution, projection_number,
1682 self.system.restraints.append(r)
1683 self._add_restraint_dataset(r)
1685 def add_em3d_restraint(self, state, target_ps, densities, pmi_restraint):
1687 r = _EM3DRestraint(self, state, pmi_restraint, target_ps, densities)
1688 self.system.restraints.append(r)
1689 self._add_restraint_dataset(r)
1691 def add_zaxial_restraint(self, state, ps, lower_bound, upper_bound,
1692 sigma, pmi_restraint):
1693 self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1694 sigma, pmi_restraint, self._xy_plane)
1696 def add_yaxial_restraint(self, state, ps, lower_bound, upper_bound,
1697 sigma, pmi_restraint):
1698 self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1699 sigma, pmi_restraint, self._xz_plane)
1701 def add_xyradial_restraint(self, state, ps, lower_bound, upper_bound,
1702 sigma, pmi_restraint):
1703 self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1704 sigma, pmi_restraint, self._z_axis)
1706 def _add_geometric_restraint(self, state, ps, lower_bound, upper_bound,
1707 sigma, pmi_restraint, geom):
1708 asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1709 r = _GeometricRestraint(
1710 self, state, pmi_restraint, geom, asym.get_feature(ps),
1711 ihm.restraint.LowerUpperBoundDistanceRestraint(lower_bound,
1714 self.system.restraints.append(r)
1715 self._add_restraint_dataset(r)
1717 def _get_membrane(self, tor_R, tor_r, tor_th):
1718 """Get an object representing a half-torus membrane"""
1719 if not hasattr(self,
'_seen_membranes'):
1720 self._seen_membranes = {}
1723 membrane_id = tuple(int(x * 100.)
for x
in (tor_R, tor_r, tor_th))
1724 if membrane_id
not in self._seen_membranes:
1725 m = ihm.geometry.HalfTorus(
1726 center=self._center_origin,
1727 transformation=self._identity_transform,
1728 major_radius=tor_R, minor_radius=tor_r, thickness=tor_th,
1729 inner=
True, name=
'Membrane')
1730 self._seen_membranes[membrane_id] = m
1731 return self._seen_membranes[membrane_id]
1733 def add_membrane_surface_location_restraint(
1734 self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1735 self._add_membrane_restraint(
1736 state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint,
1737 ihm.restraint.UpperBoundDistanceRestraint(0.))
1739 def add_membrane_exclusion_restraint(
1740 self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1741 self._add_membrane_restraint(
1742 state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint,
1743 ihm.restraint.LowerBoundDistanceRestraint(0.))
1745 def _add_membrane_restraint(self, state, ps, tor_R, tor_r, tor_th,
1746 sigma, pmi_restraint, rsr):
1747 asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1748 r = _GeometricRestraint(
1749 self, state, pmi_restraint,
1750 self._get_membrane(tor_R, tor_r, tor_th), asym.get_feature(ps),
1752 self.system.restraints.append(r)
1753 self._add_restraint_dataset(r)
1755 def add_model(self, group, assembly=None, representation=None):
1756 state = self._last_state
1757 if representation
is None:
1758 representation = self.default_representation
1759 protocol = self.all_protocols.get_last_protocol(state)
1760 m = _Model(state.prot, self, protocol,
1761 assembly
if assembly
else state.modeled_assembly,
1768 """Get custom python-ihm dumpers for writing PMI to from mmCIF.
1769 This returns a list of custom dumpers that can be passed as all or
1770 part of the `dumpers` argument to ihm.dumper.write(). They add
1771 PMI-specific information to mmCIF or BinaryCIF files written out
1773 return [_ReplicaExchangeProtocolDumper]
1777 """Get custom python-ihm handlers for reading PMI data from mmCIF.
1778 This returns a list of custom handlers that can be passed as all or
1779 part of the `handlers` argument to ihm.reader.read(). They read
1780 PMI-specific information from mmCIF or BinaryCIF files read in
1782 return [_ReplicaExchangeProtocolHandler]
1786 """Extract metadata from an EM density GMM file."""
1789 """Extract metadata from `filename`.
1790 @return a dict with key `dataset` pointing to the GMM file and
1791 `number_of_gaussians` to the number of GMMs (or None)"""
1792 loc = ihm.location.InputFileLocation(
1794 details=
"Electron microscopy density map, "
1795 "represented as a Gaussian Mixture Model (GMM)")
1798 loc._allow_duplicates =
True
1799 d = ihm.dataset.EMDensityDataset(loc)
1800 ret = {
'dataset': d,
'number_of_gaussians':
None}
1802 with open(filename)
as fh:
1804 if line.startswith(
'# data_fn: '):
1805 p = ihm.metadata.MRCParser()
1806 fn = line[11:].rstrip(
'\r\n')
1807 dataset = p.parse_file(os.path.join(
1808 os.path.dirname(filename), fn))[
'dataset']
1809 ret[
'dataset'].parents.append(dataset)
1810 elif line.startswith(
'# ncenters: '):
1811 ret[
'number_of_gaussians'] = int(line[12:])
1815 def _trim_unrep_termini(entity, asyms, representations):
1816 """Trim Entity sequence to only cover represented residues.
1818 PDB policy is for amino acid Entity sequences to be polymers (so
1819 they should include any loops or other gaps in the midst of the
1820 sequence) but for the termini to be trimmed of any not-modeled
1821 residues. Here, we modify the Entity sequence to remove any parts
1822 that are not included in any representation. This may change the
1823 numbering if any N-terminal residues are removed, and thus the offset
1824 between PMI and IHM numbering, as both count from 1."""
1826 for rep
in representations:
1828 if seg.asym_unit.entity
is entity:
1829 seg_range = seg.asym_unit.seq_id_range
1830 if rep_range
is None:
1831 rep_range = list(seg_range)
1833 rep_range[0] = min(rep_range[0], seg_range[0])
1834 rep_range[1] = max(rep_range[1], seg_range[1])
1836 if rep_range
is None or rep_range == [1, len(entity.sequence)]:
1842 pmi_offset = int(rep_range[0]) - 1
1843 entity.pmi_offset = pmi_offset
1845 if asym.entity
is entity:
1846 asym.auth_seq_id_map = entity.pmi_offset
1848 entity.sequence = entity.sequence[rep_range[0] - 1:rep_range[1]]
1853 for rep
in representations:
1855 if seg.asym_unit.entity
is entity:
1856 seg_range = seg.asym_unit.seq_id_range
1857 seg.asym_unit.seq_id_range = (seg_range[0] - pmi_offset,
1858 seg_range[1] - pmi_offset)
1859 if seg.starting_model:
1860 model = seg.starting_model
1861 seg_range = model.asym_unit.seq_id_range
1862 model.asym_unit.seq_id_range = \
1863 (seg_range[0] - pmi_offset,
1864 seg_range[1] - pmi_offset)
1865 model.offset = model.offset - pmi_offset
Select non water and non hydrogen atoms.
def get_handlers
Get custom python-ihm handlers for reading PMI data from mmCIF.
def create_representation
Create a new Representation and return it.
def create_transformed_component
Make a new component that's a transformed copy of another.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def exclude_coordinates
Don't record coordinates for the given domain.
A decorator to associate a particle with a part of a protein/DNA/RNA.
def add_uniprot_reference
Add UniProt accession (if available) to the IHM system.
def finalize_build
Called immediately after the PMI system is built.
Class to encode a modeling protocol as mmCIF.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def parse_file
Extract metadata from filename.
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Extract metadata from an EM density GMM file.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
A single asymmetric unit in the system.
A single entity in the system.
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
Classes to represent data structures used in mmCIF.
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
void add_restraint(RMF::FileHandle fh, Restraint *hs)
static bool get_is_setup(Model *m, ParticleIndex pi)
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def finalize
Do any final processing on the class hierarchy.
Base class for capturing a modeling protocol.
Basic utilities for handling cryo-electron microscopy 3D density maps.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
def get_dumpers
Get custom python-ihm dumpers for writing PMI to from mmCIF.
Class for easy writing of PDBs, RMFs, and stat files.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
A decorator for a residue.
Basic functionality that is expected to be used by a wide variety of IMP users.
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Mapping between FASTA one-letter codes and residue types.
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Select all ATOM and HETATM records with the given chain ids.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...