IMP logo
IMP Reference Guide  develop.50fdd7fa33,2025/08/31
The Integrative Modeling Platform
mmcif.py
1 """@namespace IMP.pmi.mmcif
2  @brief Support for the mmCIF file format.
3 
4  IMP has basic support for writing out files in mmCIF format, for
5  deposition in [PDB-IHM](https://pdb-ihm.org/).
6  mmCIF files are currently generated by creating an
7  IMP.pmi.mmcif.ProtocolOutput class, and attaching it to an
8  IMP.pmi.representation.Representation object, after which any
9  generated models and metadata are collected and output as mmCIF.
10 """
11 
12 import copy
13 import RMF
14 import IMP.core
15 import IMP.algebra
16 import IMP.atom
17 import IMP.em
18 import IMP.isd
19 import IMP.pmi.tools
20 import IMP.pmi.alphabets
21 from IMP.pmi.tools import OrderedDict
22 import IMP.pmi.output
23 import IMP.mmcif.data
24 import re
25 import ast
26 import sys
27 import os
28 import weakref
29 import ihm.location
30 import ihm.dataset
31 import ihm.dumper
32 import ihm.reader
33 import ihm.metadata
34 import ihm.startmodel
35 import ihm.model
36 import ihm.protocol
37 import ihm.analysis
38 import ihm.representation
39 import ihm.geometry
40 import ihm.cross_linkers
41 import ihm.reference
42 
43 
44 def _assign_id(obj, seen_objs, obj_by_id):
45  """Assign a unique ID to obj, and track all ids in obj_by_id."""
46  if obj not in seen_objs:
47  if not hasattr(obj, 'id'):
48  obj_by_id.append(obj)
49  obj.id = len(obj_by_id)
50  seen_objs[obj] = obj.id
51  else:
52  obj.id = seen_objs[obj]
53 
54 
55 def _get_by_residue(p):
56  """Determine whether the given particle represents a specific residue
57  or a more coarse-grained object."""
59 
60 
61 class _ComponentMapper:
62  """Map a Particle to a component name"""
63  def __init__(self, prot):
64  self.o = IMP.pmi.output.Output()
65  self.prot = prot
66  self.name = 'cif-output'
67  self.o.dictionary_pdbs[self.name] = self.prot
68  self.o._init_dictchain(self.name, self.prot,
69  multichar_chain=True, mmcif=True)
70 
71  def __getitem__(self, p):
72  protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
73  return protname
74 
75 
76 class _AsymMapper:
77  """Map a Particle to an asym_unit"""
78  def __init__(self, simo, prot):
79  self.simo = simo
80  self._cm = _ComponentMapper(prot)
81  self._seen_ranges = {}
82 
83  def __getitem__(self, p):
84  protname = self._cm[p]
85  return self.simo.asym_units[protname]
86 
87  def get_feature(self, ps):
88  """Get an ihm.restraint.Feature that covers the given particles"""
89  # todo: handle things other than residues
90  rngs = []
91  for p in ps:
92  asym = self[p]
93  # todo: handle overlapping ranges
95  rind = IMP.atom.Residue(p).get_index()
96  rng = asym(rind, rind)
98  # PMI Fragments always contain contiguous residues
99  rinds = IMP.atom.Fragment(p).get_residue_indexes()
100  rng = asym(rinds[0], rinds[-1])
101  else:
102  raise ValueError("Unsupported particle type %s" % str(p))
103  # Join contiguous ranges
104  if len(rngs) > 0 and rngs[-1].asym == asym \
105  and rngs[-1].seq_id_range[1] == rng.seq_id_range[0] - 1:
106  rngs[-1].seq_id_range = (rngs[-1].seq_id_range[0],
107  rng.seq_id_range[1])
108  else:
109  rngs.append(rng)
110  # If an identical feature already exists, return that
111  # todo: python-ihm should handle this automatically for us
112  hrngs = tuple(rngs)
113  if hrngs in self._seen_ranges:
114  return self._seen_ranges[hrngs]
115  else:
116  feat = ihm.restraint.ResidueFeature(rngs)
117  self._seen_ranges[hrngs] = feat
118  return feat
119 
120 
121 class _AllSoftware:
122  def __init__(self, system):
123  self.system = system
124  self.modeller_used = self.phyre2_used = False
125  self.pmi = ihm.Software(
126  name="IMP PMI module",
127  version=IMP.pmi.__version__,
128  classification="integrative model building",
129  description="integrative model building",
130  location='https://integrativemodeling.org')
131  self.imp = ihm.Software(
132  name="Integrative Modeling Platform (IMP)",
133  version=IMP.__version__,
134  classification="integrative model building",
135  description="integrative model building",
136  location='https://integrativemodeling.org')
137  # Only recent versions of python-ihm support adding citations for
138  # software
139  if hasattr(self.imp, 'citation'):
140  javi = 'Vel\u00e1zquez-Muriel J'
141  self.imp.citation = ihm.Citation(
142  pmid='22272186',
143  title='Putting the pieces together: integrative modeling '
144  'platform software for structure determination of '
145  'macromolecular assemblies',
146  journal='PLoS Biol', volume=10, page_range='e1001244',
147  year=2012,
148  authors=['Russel D', 'Lasker K', 'Webb B', javi, 'Tjioe E',
149  'Schneidman-Duhovny D', 'Peterson B', 'Sali A'],
150  doi='10.1371/journal.pbio.1001244')
151  self.pmi.citation = ihm.Citation(
152  pmid='31396911',
153  title='Modeling Biological Complexes Using Integrative '
154  'Modeling Platform.',
155  journal='Methods Mol Biol', volume=2022, page_range=(353, 377),
156  year=2019,
157  authors=['Saltzberg D', 'Greenberg CH', 'Viswanath S',
158  'Chemmama I', 'Webb B', 'Pellarin R', 'Echeverria I',
159  'Sali A'],
160  doi='10.1007/978-1-4939-9608-7_15')
161  self.system.software.extend([self.pmi, self.imp])
162 
163  def set_modeller_used(self, version, date):
164  if self.modeller_used:
165  return
166  self.modeller_used = True
167  s = ihm.Software(
168  name='MODELLER', classification='comparative modeling',
169  description='Comparative modeling by satisfaction '
170  'of spatial restraints, build ' + date,
171  location='https://salilab.org/modeller/', version=version)
172  self.system.software.append(s)
173  if hasattr(s, 'citation'):
174  s.citation = ihm.Citation(
175  pmid='8254673',
176  title='Comparative protein modelling by satisfaction of '
177  'spatial restraints.',
178  journal='J Mol Biol', volume=234, page_range=(779, 815),
179  year=1993, authors=['Sali A', 'Blundell TL'],
180  doi='10.1006/jmbi.1993.1626')
181 
182  def set_phyre2_used(self):
183  if self.phyre2_used:
184  return
185  self.phyre2_used = True
186  s = ihm.Software(
187  name='Phyre2', classification='protein homology modeling',
188  description='Protein Homology/analogY Recognition Engine V 2.0',
189  version='2.0', location='http://www.sbg.bio.ic.ac.uk/~phyre2/')
190  if hasattr(s, 'citation'):
191  s.citation = ihm.Citation(
192  pmid='25950237',
193  title='The Phyre2 web portal for protein modeling, '
194  'prediction and analysis.',
195  journal='Nat Protoc', volume=10, page_range=(845, 858),
196  authors=['Kelley LA', 'Mezulis S', 'Yates CM', 'Wass MN',
197  'Sternberg MJ'],
198  year=2015, doi='10.1038/nprot.2015.053')
199  self.system.software.append(s)
200 
201 
202 def _get_fragment_is_rigid(fragment):
203  """Determine whether a fragment is modeled rigidly"""
204  leaves = IMP.atom.get_leaves(fragment.hier)
205  # Assume all leaves are set up as rigid/flexible in the same way
206  return IMP.core.RigidMember.get_is_setup(leaves[0])
207 
208 
209 class _PDBFragment(ihm.representation.ResidueSegment):
210  """Record details about part of a PDB file used as input
211  for a component."""
212  def __init__(self, state, component, start, end, pdb_offset,
213  pdbname, chain, hier, asym_unit):
214  # start, end are PMI residue indexes (not IHM)
215  super().__init__(
216  asym_unit=asym_unit.pmi_range(start, end),
217  rigid=None, primitive='sphere')
218  self.component, self.start, self.end, self.offset, self.pdbname \
219  = component, start, end, pdb_offset, pdbname
220  self.state, self.chain, self.hier = state, chain, hier
222  & IMP.atom.ChainPDBSelector([chain])
223  # Read file in mmCIF or BinaryCIF format if requested
224  if pdbname.endswith('.cif'):
225  read_file = IMP.atom.read_mmcif
226  elif pdbname.endswith('.bcif'):
227  read_file = IMP.atom.read_bcif
228  else:
229  read_file = IMP.atom.read_pdb
230  self.starting_hier = read_file(pdbname, state.model, sel)
231 
232  rigid = property(lambda self: _get_fragment_is_rigid(self),
233  lambda self, val: None)
234 
235  def combine(self, other):
236  pass
237 
238 
239 class _BeadsFragment(ihm.representation.FeatureSegment):
240  """Record details about beads used to represent part of a component."""
241  chain = None
242 
243  def __init__(self, state, component, start, end, count, hier, asym_unit):
244  super().__init__(
245  asym_unit=asym_unit(start, end), rigid=None, primitive='sphere',
246  count=count)
247  self.state, self.component, self.hier = state, component, hier
248 
249  rigid = property(lambda self: _get_fragment_is_rigid(self),
250  lambda self, val: None)
251 
252  def combine(self, other):
253  # todo: don't combine if one fragment is rigid and the other flexible
254  if (type(other) == type(self) and # noqa: E721
255  other.asym_unit.seq_id_range[0]
256  == self.asym_unit.seq_id_range[1] + 1):
257  self.asym_unit.seq_id_range = (self.asym_unit.seq_id_range[0],
258  other.asym_unit.seq_id_range[1])
259  self.count += other.count
260  return True
261 
262 
263 class _AllModelRepresentations:
264  def __init__(self, simo):
265  self.simo = simo
266  # dict of fragments, ordered by representation, then component name,
267  # then state
268  self.fragments = OrderedDict()
269  self._all_representations = {}
270 
271  def copy_component(self, state, name, original, asym_unit):
272  """Copy all representation for `original` in `state` to `name`"""
273  def copy_frag(f):
274  newf = copy.copy(f)
275  newf.asym_unit = asym_unit(*f.asym_unit.seq_id_range)
276  return newf
277  for rep in self.fragments:
278  if original in self.fragments[rep]:
279  if name not in self.fragments[rep]:
280  self.fragments[rep][name] = OrderedDict()
281  self.fragments[rep][name][state] = [
282  copy_frag(f) for f in self.fragments[rep][original][state]]
283  # Assume representation for a component is the same in all
284  # states, so only write out the first one
285  first_state = list(self.fragments[rep][name].keys())[0]
286  if state is first_state:
287  representation = self._all_representations[rep]
288  representation.extend(self.fragments[rep][name][state])
289 
290  def add_fragment(self, state, representation, fragment):
291  """Add a model fragment."""
292  comp = fragment.component
293  id_rep = id(representation)
294  self._all_representations[id_rep] = representation
295  if id_rep not in self.fragments:
296  self.fragments[id_rep] = OrderedDict()
297  if comp not in self.fragments[id_rep]:
298  self.fragments[id_rep][comp] = OrderedDict()
299  if state not in self.fragments[id_rep][comp]:
300  self.fragments[id_rep][comp][state] = []
301  fragments = self.fragments[id_rep][comp][state]
302  if len(fragments) == 0 or not fragments[-1].combine(fragment):
303  fragments.append(fragment)
304  # Assume representation for a component is the same in all states,
305  # so only write out the first one
306  first_state = list(self.fragments[id_rep][comp].keys())[0]
307  if state is first_state:
308  representation.append(fragment)
309 
310 
311 class _AllDatasets:
312  """Track all datasets generated by PMI and add them to the ihm.System"""
313  def __init__(self, system):
314  self.system = system
315  self._datasets = []
316  self._datasets_by_state = {}
317  self._restraints_by_state = {}
318 
319  def get_all_group(self, state):
320  """Get a DatasetGroup encompassing all datasets so far in this state"""
321  # Note that if a restraint dataset is replaced after this is called,
322  # the group will still contain the old dataset - mark dataset as read
323  # only?
324  g = ihm.dataset.DatasetGroup(
325  self._datasets_by_state.get(state, [])
326  + [r.dataset for r in self._restraints_by_state.get(state, [])
327  if r.dataset])
328  return g
329 
330  def add(self, state, dataset):
331  """Add a new dataset."""
332  self._datasets.append(dataset)
333  if state not in self._datasets_by_state:
334  self._datasets_by_state[state] = []
335  self._datasets_by_state[state].append(dataset)
336  # Make sure that the dataset ends up in the mmCIF file even if nothing
337  # refers to it
338  self.system.orphan_datasets.append(dataset)
339  return dataset
340 
341  def add_restraint(self, state, restraint):
342  """Add the dataset for a restraint"""
343  if state not in self._restraints_by_state:
344  self._restraints_by_state[state] = []
345  self._restraints_by_state[state].append(restraint)
346 
347 
348 class _CrossLinkRestraint(ihm.restraint.CrossLinkRestraint):
349  """Restrain to a set of cross-links"""
350 
351  assembly = None
352  _label_map = {'wtDSS': 'DSS', 'scDSS': 'DSS', 'scEDC': 'EDC'}
353  _descriptor_map = {'DSS': ihm.cross_linkers.dss,
354  'EDC': ihm.cross_linkers.edc}
355 
356  def __init__(self, pmi_restraint):
357  self.pmi_restraint = pmi_restraint
358  # Use ChemDescriptor from PMI restraint if available, otherwise guess
359  # it using the label
360  linker = getattr(self.pmi_restraint, 'linker', None)
361  label = self.pmi_restraint.label
362  self.label = label
363  super().__init__(
364  dataset=self.pmi_restraint.dataset,
365  linker=linker or self._get_chem_descriptor(label))
366 
367  @classmethod
368  def _get_chem_descriptor(cls, label):
369  # Map commonly-used subtypes to more standard labels
370  label = cls._label_map.get(label, label)
371  if label not in cls._descriptor_map:
372  # If label is not a standard linker type, make a new chemical
373  # descriptor containing just the name. We don't know the chemistry
374  # so cannot specify a SMILES or INCHI string for it at this point
375  d = ihm.ChemDescriptor(label)
376  cls._descriptor_map[label] = d
377  return cls._descriptor_map[label]
378 
379  def _set_psi_sigma(self, model):
380  old_values = []
381  # Do nothing if given a different state than the restraint applies to
382  if model.m != self.pmi_restraint.model:
383  return
384  for resolution in self.pmi_restraint.sigma_dictionary:
385  statname = 'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, self.label)
386  if model.stats and statname in model.stats:
387  sigma = float(model.stats[statname])
388  p = self.pmi_restraint.sigma_dictionary[resolution][0]
389  old_values.append((p, p.get_scale()))
390  p.set_scale(sigma)
391  for psiindex in self.pmi_restraint.psi_dictionary:
392  statname = 'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, self.label)
393  if model.stats and statname in model.stats:
394  psi = float(model.stats[statname])
395  p = self.pmi_restraint.psi_dictionary[psiindex][0]
396  old_values.append((p, p.get_scale()))
397  p.set_scale(psi)
398  # If the same particle is set multiple times, make sure we get the
399  # original value last
400  return list(reversed(old_values))
401 
402  def add_fits_from_model_statfile(self, model):
403  # Set psi/sigma for all particles
404  old_values = self._set_psi_sigma(model)
405  # If no stats were collected, we can't show the fit
406  if not old_values:
407  return
408  for xl in self.cross_links:
409  # Get current psi/sigma particle value for each XL
410  xl.fits[model] = ihm.restraint.CrossLinkFit(
411  psi=xl.psi, sigma1=xl.sigma1, sigma2=xl.sigma2)
412  # Restore original psi/sigma
413  for p, scale in old_values:
414  p.set_scale(scale)
415 
416  # Have our dataset point to that in the original PMI restraint
417  def __set_dataset(self, val):
418  self.pmi_restraint.dataset = val
419  dataset = property(lambda self: self.pmi_restraint.dataset,
420  __set_dataset)
421 
422 
423 def get_asym_mapper_for_state(simo, state, asym_map):
424  asym = asym_map.get(state, None)
425  if asym is None:
426  asym = _AsymMapper(simo, state.prot)
427  asym_map[state] = asym
428  return asym
429 
430 
431 class _PMICrossLink:
432  # Query PMI particles to get psi and sigma values at time of use, not
433  # time of construction (since they may be set after creating the restraint)
434  psi = property(lambda self: self.psi_p.get_scale(),
435  lambda self, val: None)
436  sigma1 = property(lambda self: self.sigma1_p.get_scale(),
437  lambda self, val: None)
438  sigma2 = property(lambda self: self.sigma2_p.get_scale(),
439  lambda self, val: None)
440 
441 
442 class _ResidueCrossLink(ihm.restraint.ResidueCrossLink, _PMICrossLink):
443  pass
444 
445 
446 class _FeatureCrossLink(ihm.restraint.FeatureCrossLink, _PMICrossLink):
447  pass
448 
449 
450 class _EM2DRestraint(ihm.restraint.EM2DRestraint):
451  def __init__(self, state, pmi_restraint, image_number, resolution,
452  pixel_size, image_resolution, projection_number,
453  micrographs_number):
454  self.pmi_restraint, self.image_number = pmi_restraint, image_number
455  super().__init__(
456  dataset=pmi_restraint.datasets[image_number],
457  assembly=state.modeled_assembly,
458  segment=False, number_raw_micrographs=micrographs_number,
459  pixel_size_width=pixel_size, pixel_size_height=pixel_size,
460  image_resolution=image_resolution,
461  number_of_projections=projection_number)
462 
463  # Have our dataset point to that in the original PMI restraint
464  def __get_dataset(self):
465  return self.pmi_restraint.datasets[self.image_number]
466 
467  def __set_dataset(self, val):
468  self.pmi_restraint.datasets[self.image_number] = val
469 
470  dataset = property(__get_dataset, __set_dataset)
471 
472  def add_fits_from_model_statfile(self, model):
473  ccc = self._get_cross_correlation(model)
474  transform = self._get_transformation(model)
475  rot = transform.get_rotation()
476  rm = [[e for e in rot.get_rotation_matrix_row(i)] for i in range(3)]
477  self.fits[model] = ihm.restraint.EM2DRestraintFit(
478  cross_correlation_coefficient=ccc,
479  rot_matrix=rm,
480  tr_vector=transform.get_translation())
481 
482  def _get_transformation(self, model):
483  """Get the transformation that places the model on the image"""
484  stats = model.em2d_stats or model.stats
485  prefix = 'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
486  self.image_number + 1)
487  r = [float(stats[prefix + '_Rotation%d' % i]) for i in range(4)]
488  t = [float(stats[prefix + '_Translation%d' % i])
489  for i in range(3)]
490  # If the model coordinates are transformed, need to back transform
491  # them first
492  inv = model.transform.get_inverse()
494  IMP.algebra.Vector3D(*t)) * inv
495 
496  def _get_cross_correlation(self, model):
497  """Get the cross correlation coefficient between the model projection
498  and the image"""
499  stats = model.em2d_stats or model.stats
500  return float(stats['ElectronMicroscopy2D_%s_Image%d_CCC'
501  % (self.pmi_restraint.label,
502  self.image_number + 1)])
503 
504 
505 class _EM3DRestraint(ihm.restraint.EM3DRestraint):
506 
507  def __init__(self, simo, state, pmi_restraint, target_ps, densities):
508  self.pmi_restraint = pmi_restraint
509  super().__init__(
510  dataset=pmi_restraint.dataset,
511  assembly=self._get_assembly(densities, simo, state),
512  fitting_method='Gaussian mixture models',
513  number_of_gaussians=len(target_ps))
514 
515  # Have our dataset point to that in the original PMI restraint
516  def __set_dataset(self, val):
517  self.pmi_restraint.dataset = val
518  dataset = property(lambda self: self.pmi_restraint.dataset,
519  __set_dataset)
520 
521  def _get_assembly(self, densities, simo, state):
522  """Get the Assembly that this restraint acts on"""
523  cm = _ComponentMapper(state.prot)
524  components = {}
525  for d in densities:
526  components[cm[d]] = None # None == all residues in this component
527  a = simo._get_subassembly(
528  components, name="EM subassembly",
529  description="All components that fit the EM map")
530  return a
531 
532  def add_fits_from_model_statfile(self, model):
533  ccc = self._get_cross_correlation(model)
534  self.fits[model] = ihm.restraint.EM3DRestraintFit(
535  cross_correlation_coefficient=ccc)
536 
537  def _get_cross_correlation(self, model):
538  """Get the cross correlation coefficient between the model
539  and the map"""
540  if model.stats is not None:
541  return float(model.stats['GaussianEMRestraint_%s_CCC'
542  % self.pmi_restraint.label])
543 
544 
545 class _GeometricRestraint(ihm.restraint.GeometricRestraint):
546 
547  def __init__(self, simo, state, pmi_restraint, geometric_object,
548  feature, distance, sigma):
549  self.pmi_restraint = pmi_restraint
550  super().__init__(
551  dataset=pmi_restraint.dataset,
552  geometric_object=geometric_object, feature=feature,
553  distance=distance, harmonic_force_constant=1. / sigma,
554  restrain_all=True)
555 
556  # Have our dataset point to that in the original PMI restraint
557  def __set_dataset(self, val):
558  self.pmi_restraint.dataset = val
559  dataset = property(lambda self: self.pmi_restraint.dataset,
560  __set_dataset)
561 
562 
563 class _ReplicaExchangeProtocolStep(ihm.protocol.Step):
564  def __init__(self, state, rex):
565  if rex.monte_carlo_sample_objects is not None:
566  method = 'Replica exchange monte carlo'
567  else:
568  method = 'Replica exchange molecular dynamics'
569  self.monte_carlo_temperature = rex.vars['monte_carlo_temperature']
570  self.replica_exchange_minimum_temperature = \
571  rex.vars['replica_exchange_minimum_temperature']
572  self.replica_exchange_maximum_temperature = \
573  rex.vars['replica_exchange_maximum_temperature']
574  super().__init__(
575  assembly=state.modeled_assembly,
576  dataset_group=None, # filled in by add_step()
577  method=method, name='Sampling',
578  num_models_begin=None, # filled in by add_step()
579  num_models_end=rex.vars["number_of_frames"],
580  multi_scale=True, multi_state=False, ordered=False, ensemble=True)
581 
582 
583 class _ReplicaExchangeProtocolDumper(ihm.dumper.Dumper):
584  """Write IMP-specific information about replica exchange to mmCIF.
585  Note that IDs will have already been assigned by python-ihm's
586  standard modeling protocol dumper."""
587  def dump(self, system, writer):
588  with writer.loop("_imp_replica_exchange_protocol",
589  ["protocol_id", "step_id", "monte_carlo_temperature",
590  "replica_exchange_minimum_temperature",
591  "replica_exchange_maximum_temperature"]) as lp:
592  for p in system._all_protocols():
593  for s in p.steps:
594  if isinstance(s, _ReplicaExchangeProtocolStep):
595  self._dump_step(p, s, lp)
596 
597  def _dump_step(self, p, s, lp):
598  mintemp = s.replica_exchange_minimum_temperature
599  maxtemp = s.replica_exchange_maximum_temperature
600  lp.write(protocol_id=p._id, step_id=s._id,
601  monte_carlo_temperature=s.monte_carlo_temperature,
602  replica_exchange_minimum_temperature=mintemp,
603  replica_exchange_maximum_temperature=maxtemp)
604 
605 
606 class _ReplicaExchangeProtocolHandler(ihm.reader.Handler):
607  category = '_imp_replica_exchange_protocol'
608 
609  """Read IMP-specific information about replica exchange from mmCIF."""
610  def __call__(self, protocol_id, step_id, monte_carlo_temperature,
611  replica_exchange_minimum_temperature,
612  replica_exchange_maximum_temperature):
613  p = self.sysr.protocols.get_by_id(protocol_id)
614  # todo: match step_id properly (don't assume contiguous)
615  s = p.steps[int(step_id)-1]
616  # Upgrade from plain ihm Step to IMP subclass
617  s.__class__ = _ReplicaExchangeProtocolStep
618  s.monte_carlo_temperature = \
619  self.get_float(monte_carlo_temperature)
620  s.replica_exchange_minimum_temperature = \
621  self.get_float(replica_exchange_minimum_temperature)
622  s.replica_exchange_maximum_temperature = \
623  self.get_float(replica_exchange_maximum_temperature)
624 
625 
626 class _SimpleProtocolStep(ihm.protocol.Step):
627  def __init__(self, state, num_models_end, method):
628  super().__init__(
629  assembly=state.modeled_assembly,
630  dataset_group=None, # filled in by add_step()
631  method=method, name='Sampling',
632  num_models_begin=None, # filled in by add_step()
633  num_models_end=num_models_end,
634  multi_scale=True, multi_state=False, ordered=False,
635  ensemble=True)
636 
637 
638 class _Chain:
639  """Represent a single chain in a Model"""
640  def __init__(self, pmi_chain_id, asym_unit):
641  self.pmi_chain_id, self.asym_unit = pmi_chain_id, asym_unit
642  self.spheres = []
643  self.atoms = []
644 
645  def add(self, xyz, atom_type, residue_type, residue_index,
646  all_indexes, radius):
647  if atom_type is None:
648  self.spheres.append((xyz, residue_type, residue_index,
649  all_indexes, radius))
650  else:
651  self.atoms.append((xyz, atom_type, residue_type, residue_index,
652  all_indexes, radius))
653  orig_comp = property(lambda self: self.comp)
654 
655 
656 class _TransformedChain:
657  """Represent a chain that is a transformed version of another"""
658  def __init__(self, orig_chain, asym_unit, transform):
659  self.orig_chain, self.asym_unit = orig_chain, asym_unit
660  self.transform = transform
661 
662  def __get_spheres(self):
663  for (xyz, residue_type, residue_index, all_indexes,
664  radius) in self.orig_chain.spheres:
665  yield (self.transform * xyz, residue_type, residue_index,
666  all_indexes, radius)
667  spheres = property(__get_spheres)
668 
669  def __get_atoms(self):
670  for (xyz, atom_type, residue_type, residue_index, all_indexes,
671  radius) in self.orig_chain.atoms:
672  yield (self.transform * xyz, atom_type, residue_type,
673  residue_index, all_indexes, radius)
674  atoms = property(__get_atoms)
675 
676  entity = property(lambda self: self.orig_chain.entity)
677  orig_comp = property(lambda self: self.orig_chain.comp)
678 
679 
680 class _Excluder:
681  def __init__(self, component, simo):
682  self._seqranges = simo._exclude_coords.get(component, [])
683 
684  def is_excluded(self, indexes):
685  """Return True iff the given sequence range is excluded."""
686  for seqrange in self._seqranges:
687  if indexes[0] >= seqrange[0] and indexes[-1] <= seqrange[1]:
688  return True
689 
690 
691 class _Model(ihm.model.Model):
692  def __init__(self, prot, simo, protocol, assembly, representation):
693  super().__init__(assembly=assembly, protocol=protocol,
694  representation=representation)
695  self.simo = weakref.proxy(simo)
696  # Transformation from IMP coordinates into mmCIF coordinate system.
697  # Normally we pass through coordinates unchanged, but in some cases
698  # we may want to translate them (e.g. Nup84, where the deposited PDB
699  # files are all centered; we want the mmCIF files to match)
701  self.em2d_stats = None
702  self.stats = None
703  # True iff restraints act on this model
704  self._is_restrained = True
705  o = self.output = IMP.pmi.output.Output(atomistic=True)
706  name = 'cif-output'
707  self.m = prot.get_model()
708  o.dictionary_pdbs[name] = prot
709  o._init_dictchain(name, prot, multichar_chain=True)
710  (particle_infos_for_pdb,
711  self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
712  self.geometric_center = IMP.algebra.Vector3D(*self.geometric_center)
713  self._make_spheres_atoms(particle_infos_for_pdb, o, name, simo)
714  self.rmsf = {}
715 
716  def all_chains(self, simo):
717  """Yield all chains, including transformed ones"""
718  chain_for_comp = {}
719  for c in self.chains:
720  yield c
721  chain_for_comp[c.comp] = c
722  for tc in simo._transformed_components:
723  orig_chain = chain_for_comp.get(tc.original, None)
724  if orig_chain:
725  asym = simo.asym_units[tc.name]
726  c = _TransformedChain(orig_chain, asym, tc.transform)
727  c.comp = tc.name
728  yield c
729 
730  def _make_spheres_atoms(self, particle_infos_for_pdb, o, name, simo):
731  entity_for_chain = {}
732  comp_for_chain = {}
733  correct_asym = {}
734  for protname, chain_id in o.dictchain[name].items():
735  if protname in simo.entities:
736  entity_for_chain[chain_id] = simo.entities[protname]
737  else:
738  # Remove copy number
739  pn = protname.split('.')[0]
740  entity_for_chain[chain_id] = simo.entities[pn]
741  comp_for_chain[chain_id] = protname
742  # When doing multi-state modeling, the chain ID returned here
743  # (assigned sequentially) might not be correct (states may have
744  # gaps in the chain IDs). Map it to the correct asym unit.
745  correct_asym[chain_id] = simo.asym_units[protname]
746 
747  # Gather by chain ID (should be sorted by chain ID already)
748  self.chains = []
749  chain = None
750  excluder = None
751 
752  for (xyz, atom_type, residue_type, chain_id, residue_index,
753  all_indexes, radius) in particle_infos_for_pdb:
754  if chain is None or chain.pmi_chain_id != chain_id:
755  chain = _Chain(chain_id, correct_asym[chain_id])
756  chain.entity = entity_for_chain[chain_id]
757  chain.comp = comp_for_chain[chain_id]
758  self.chains.append(chain)
759  excluder = _Excluder(chain.comp, simo)
760  if not excluder.is_excluded(all_indexes if all_indexes
761  else [residue_index]):
762  chain.add(xyz, atom_type, residue_type, residue_index,
763  all_indexes, radius)
764 
765  def parse_rmsf_file(self, fname, component):
766  self.rmsf[component] = rmsf = {}
767  with open(str(fname)) as fh:
768  for line in fh:
769  resnum, blocknum, val = line.split()
770  rmsf[int(resnum)] = (int(blocknum), float(val))
771 
772  def get_rmsf(self, component, indexes):
773  """Get the RMSF value for the given residue indexes."""
774  if not self.rmsf:
775  return None
776  rmsf = self.rmsf[component]
777  blocknums = dict.fromkeys(rmsf[ind][0] for ind in indexes)
778  if len(blocknums) != 1:
779  raise ValueError("Residue indexes %s aren't all in the same block"
780  % str(indexes))
781  return rmsf[indexes[0]][1]
782 
783  def get_atoms(self):
784  for chain in self.all_chains(self.simo):
785  pmi_offset = chain.asym_unit.entity.pmi_offset
786  for atom in chain.atoms:
787  (xyz, atom_type, residue_type, residue_index,
788  all_indexes, radius) = atom
789  pt = self.transform * xyz
790  yield ihm.model.Atom(
791  asym_unit=chain.asym_unit,
792  seq_id=residue_index - pmi_offset,
793  atom_id=atom_type.get_string(),
794  type_symbol=None, # todo: get element
795  x=pt[0], y=pt[1], z=pt[2])
796 
797  def get_spheres(self):
798  for chain in self.all_chains(self.simo):
799  pmi_offset = chain.asym_unit.entity.pmi_offset
800  for sphere in chain.spheres:
801  (xyz, residue_type, residue_index,
802  all_indexes, radius) = sphere
803  if all_indexes is None:
804  all_indexes = (residue_index,)
805  pt = self.transform * xyz
806  yield ihm.model.Sphere(
807  asym_unit=chain.asym_unit,
808  seq_id_range=(all_indexes[0] - pmi_offset,
809  all_indexes[-1] - pmi_offset),
810  x=pt[0], y=pt[1], z=pt[2], radius=radius,
811  rmsf=self.get_rmsf(chain.orig_comp, all_indexes))
812 
813 
814 class _AllProtocols:
815  def __init__(self, simo):
816  self.simo = weakref.proxy(simo)
817  # Lists of Protocols by state
818  self.protocols = OrderedDict()
819 
820  def add_protocol(self, state):
821  """Add a new Protocol"""
822  if state not in self.protocols:
823  self.protocols[state] = []
824  p = ihm.protocol.Protocol()
825  self.simo.system.orphan_protocols.append(p)
826  self.protocols[state].append(p)
827 
828  def add_step(self, step, state):
829  """Add a ProtocolStep to the last Protocol of the given State"""
830  if state not in self.protocols:
831  self.add_protocol(state)
832  protocol = self.get_last_protocol(state)
833  if len(protocol.steps) == 0:
834  step.num_models_begin = 0
835  else:
836  step.num_models_begin = protocol.steps[-1].num_models_end
837  protocol.steps.append(step)
838  step.id = len(protocol.steps)
839  # Assume that protocol uses all currently-defined datasets
840  step.dataset_group = self.simo.all_datasets.get_all_group(state)
841 
842  def add_postproc(self, step, state):
843  """Add a postprocessing step to the last protocol"""
844  protocol = self.get_last_protocol(state)
845  if len(protocol.analyses) == 0:
846  protocol.analyses.append(ihm.analysis.Analysis())
847  protocol.analyses[-1].steps.append(step)
848 
849  def get_last_protocol(self, state):
850  """Return the most recently-added _Protocol"""
851  return self.protocols[state][-1]
852 
853 
854 class _AllStartingModels:
855  def __init__(self, simo):
856  self.simo = simo
857  # dict of starting models (entire PDB files), collected from fragments,
858  # ordered by component name and state
859  self.models = OrderedDict()
860  self.output = IMP.pmi.output.Output()
861 
862  def add_pdb_fragment(self, fragment):
863  """Add a starting model PDB fragment."""
864  comp = fragment.component
865  state = fragment.state
866  if comp not in self.models:
867  self.models[comp] = OrderedDict()
868  if state not in self.models[comp]:
869  self.models[comp][state] = []
870  models = self.models[comp][state]
871  if len(models) == 0 \
872  or models[-1].fragments[0].pdbname != fragment.pdbname:
873  model = self._add_model(fragment)
874  models.append(model)
875  else:
876  # Break circular ref between fragment and model
877  models[-1].fragments.append(weakref.proxy(fragment))
878  # Update residue range to cover all fragments
879  # We don't use fragment.offset here because fragment numbering
880  # is already in seq_id, not PDB numbering
881  pmi_offset = models[-1].asym_unit.entity.pmi_offset
882  sid_begin = min(fragment.start - pmi_offset,
883  models[-1].asym_unit.seq_id_range[0])
884  sid_end = max(fragment.end - pmi_offset,
885  models[-1].asym_unit.seq_id_range[1])
886  models[-1].asym_unit = fragment.asym_unit.asym(sid_begin, sid_end)
887  fragment.starting_model = models[-1]
888 
889  def _add_model(self, f):
890  if (hasattr(ihm.metadata, 'CIFParser')
891  and f.pdbname.endswith('.cif')):
892  parser = ihm.metadata.CIFParser()
893  elif (hasattr(ihm.metadata, 'BinaryCIFParser')
894  and f.pdbname.endswith('.bcif')):
895  parser = ihm.metadata.BinaryCIFParser()
896  else:
897  parser = ihm.metadata.PDBParser()
898  r = parser.parse_file(f.pdbname)
899 
900  self.simo._add_dataset(r['dataset'])
901  # We only want the templates that model the starting model chain
902  templates = r['templates'].get(f.chain, [])
903  for t in templates:
904  if t.alignment_file:
905  self.simo.system.locations.append(t.alignment_file)
906  if t.dataset:
907  self.simo._add_dataset(t.dataset)
908  source = r.get('entity_source', {}).get(f.chain)
909  if source:
910  f.asym_unit.entity.source = source
911  pmi_offset = f.asym_unit.entity.pmi_offset
912  m = _StartingModel(
913  asym_unit=f.asym_unit.asym.pmi_range(f.start, f.end),
914  dataset=r['dataset'], asym_id=f.chain,
915  templates=templates, offset=f.offset + pmi_offset,
916  metadata=r.get('metadata'),
917  software=r['software'][0] if r['software'] else None,
918  script_file=r['script'])
919  m.fragments = [weakref.proxy(f)]
920  return m
921 
922 
923 class _StartingModel(ihm.startmodel.StartingModel):
924  def get_seq_dif(self):
925  return self._seq_dif
926 
927  def get_atoms(self):
928  pmi_offset = self.asym_unit.entity.pmi_offset
929  mh = IMP.mmcif.data._StartingModelAtomHandler(self.templates,
930  self.asym_unit)
931  for f in self.fragments:
932  sel = IMP.atom.Selection(
933  f.starting_hier,
934  residue_indexes=list(range(f.start - f.offset,
935  f.end - f.offset + 1)))
936  for a in mh.get_ihm_atoms(sel.get_selected_particles(),
937  f.offset - pmi_offset):
938  yield a
939  self._seq_dif = mh._seq_dif
940 
941 
942 class _ReplicaExchangeAnalysisPostProcess(ihm.analysis.ClusterStep):
943  """Post processing using AnalysisReplicaExchange0 macro"""
944 
945  def __init__(self, rex, num_models_begin):
946  self.rex = rex
947  num_models_end = 0
948  for fname in self.get_all_stat_files():
949  with open(str(fname)) as fh:
950  num_models_end += len(fh.readlines())
951  super().__init__(
952  feature='RMSD', num_models_begin=num_models_begin,
953  num_models_end=num_models_end)
954 
955  def get_stat_file(self, cluster_num):
956  return self.rex._outputdir / ("cluster.%d" % cluster_num) / 'stat.out'
957 
958  def get_all_stat_files(self):
959  for i in range(self.rex._number_of_clusters):
960  yield self.get_stat_file(i)
961 
962 
963 class _ReplicaExchangeAnalysisEnsemble(ihm.model.Ensemble):
964  """Ensemble generated using AnalysisReplicaExchange0 macro"""
965 
966  num_models_deposited = None # Override base class property
967 
968  def __init__(self, pp, cluster_num, model_group, num_deposit):
969  with open(str(pp.get_stat_file(cluster_num))) as fh:
970  num_models = len(fh.readlines())
971  super().__init__(
972  num_models=num_models,
973  model_group=model_group, post_process=pp,
974  clustering_feature=pp.feature,
975  name=model_group.name)
976  self.cluster_num = cluster_num
977  self.num_models_deposited = num_deposit
978 
979  def get_rmsf_file(self, component):
980  return (self.post_process.rex._outputdir
981  / ('cluster.%d' % self.cluster_num)
982  / ('rmsf.%s.dat' % component))
983 
984  def load_rmsf(self, model, component):
985  fname = self.get_rmsf_file(component)
986  if fname.exists():
987  model.parse_rmsf_file(fname, component)
988 
989  def get_localization_density_file(self, fname):
990  return (self.post_process.rex._outputdir
991  / ('cluster.%d' % self.cluster_num)
992  / ('%s.mrc' % fname))
993 
994  def load_localization_density(self, state, fname, select_tuple,
995  asym_units):
996  fullpath = self.get_localization_density_file(fname)
997  if fullpath.exists():
998  details = "Localization density for %s %s" \
999  % (fname, self.model_group.name)
1000  local_file = ihm.location.OutputFileLocation(str(fullpath),
1001  details=details)
1002  for s in select_tuple:
1003  if isinstance(s, tuple) and len(s) == 3:
1004  asym = asym_units[s[2]].pmi_range(s[0], s[1])
1005  else:
1006  asym = asym_units[s]
1007  den = ihm.model.LocalizationDensity(file=local_file,
1008  asym_unit=asym)
1009  self.densities.append(den)
1010 
1011  def load_all_models(self, simo, state):
1012  stat_fname = self.post_process.get_stat_file(self.cluster_num)
1013  model_num = 0
1014  with open(str(stat_fname)) as fh:
1015  stats = ast.literal_eval(fh.readline())
1016  # Correct path
1017  rmf_file = stat_fname.parent / ("%d.rmf3" % model_num)
1018  # todo: test with real PMI2 systems
1019  if rmf_file.exists():
1020  rh = RMF.open_rmf_file_read_only(str(rmf_file))
1021  system = state._pmi_object.system
1022  IMP.rmf.link_hierarchies(rh, [system.hier])
1023  IMP.rmf.load_frame(fh, RMF.FrameID(0))
1024  # todo: fill in other data from stat file, e.g. cross-link phi/psi
1025  yield stats
1026  model_num += 1
1027  if model_num >= self.num_models_deposited:
1028  return
1029 
1030  # todo: also support dRMS precision
1031  def _get_precision(self):
1032  precfile = (self.post_process.rex._outputdir /
1033  ("precision.%d.%d.out" % (self.cluster_num,
1034  self.cluster_num)))
1035  if not precfile.exists():
1036  return ihm.unknown
1037  # Fail if the precision.x.x.out file doesn't match the cluster
1038  r = re.compile(
1039  r'All .*/cluster.%d/ average centroid distance ([\d\.]+)'
1040  % self.cluster_num)
1041  with open(str(precfile)) as fh:
1042  for line in fh:
1043  m = r.match(line)
1044  if m:
1045  return float(m.group(1))
1046 
1047  precision = property(lambda self: self._get_precision(),
1048  lambda self, val: None)
1049 
1050 
1051 class _SimpleEnsemble(ihm.model.Ensemble):
1052  """Simple manually-created ensemble"""
1053 
1054  num_models_deposited = None # Override base class property
1055 
1056  def __init__(self, pp, model_group, num_models, drmsd,
1057  num_models_deposited, ensemble_file):
1058  super().__init__(
1059  model_group=model_group, post_process=pp, num_models=num_models,
1060  file=ensemble_file, precision=drmsd, name=model_group.name,
1061  clustering_feature='dRMSD')
1062  self.num_models_deposited = num_models_deposited
1063 
1064  def load_localization_density(self, state, component, asym, local_file):
1065  den = ihm.model.LocalizationDensity(file=local_file, asym_unit=asym)
1066  self.densities.append(den)
1067 
1068 
1069 class _CustomDNAAlphabet:
1070  """Custom DNA alphabet that maps A,C,G,T (rather than DA,DC,DG,DT
1071  as in python-ihm)"""
1072  _comps = dict([cc.code_canonical, cc]
1073  for cc in ihm.DNAAlphabet._comps.values())
1074 
1075 
1076 class _EntityMapper(dict):
1077  """Handle mapping from IMP components (without copy number) to CIF
1078  entities. Multiple components may map to the same entity if they
1079  share sequence."""
1080  def __init__(self, system):
1081  super().__init__()
1082  self._sequence_dict = {}
1083  self._entities = []
1084  self.system = system
1085 
1086  def _get_alphabet(self, alphabet):
1087  """Map a PMI alphabet to an IHM alphabet"""
1088  # todo: we should probably ignore alphabets entirely and use
1089  # residue types directly (e.g. we shouldn't compare one-letter
1090  # code sequence to determine if an Entity is unique)
1091  alphabet_map = {None: ihm.LPeptideAlphabet,
1092  IMP.pmi.alphabets.amino_acid: ihm.LPeptideAlphabet,
1093  IMP.pmi.alphabets.rna: ihm.RNAAlphabet,
1094  IMP.pmi.alphabets.dna: _CustomDNAAlphabet}
1095  if alphabet in alphabet_map:
1096  return alphabet_map[alphabet]
1097  else:
1098  raise TypeError("Don't know how to handle %s" % alphabet)
1099 
1100  def add(self, component_name, sequence, offset, alphabet, uniprot):
1101  def entity_seq(sequence):
1102  # Map X to UNK
1103  if 'X' in sequence:
1104  return ['UNK' if s == 'X' else s for s in sequence]
1105  else:
1106  return sequence
1107  if sequence not in self._sequence_dict:
1108  # Use the name of the first component, stripped of any copy number,
1109  # as the description of the entity
1110  d = component_name.split("@")[0].split(".")[0]
1111  entity = Entity(entity_seq(sequence), description=d,
1112  pmi_offset=offset,
1113  alphabet=self._get_alphabet(alphabet),
1114  uniprot=uniprot)
1115  self.system.entities.append(entity)
1116  self._sequence_dict[sequence] = entity
1117  self[component_name] = self._sequence_dict[sequence]
1118 
1119 
1120 class _TransformedComponent:
1121  def __init__(self, name, original, transform):
1122  self.name, self.original, self.transform = name, original, transform
1123 
1124 
1125 class _SimpleRef:
1126  """Class with similar interface to weakref.ref, but keeps a strong ref"""
1127  def __init__(self, ref):
1128  self.ref = ref
1129 
1130  def __call__(self):
1131  return self.ref
1132 
1133 
1134 class _State(ihm.model.State):
1135  """Representation of a single state in the system."""
1136 
1137  def __init__(self, pmi_object, po):
1138  # Point to the PMI object for this state. Use a weak reference
1139  # since the state object typically points to us too, so we need
1140  # to break the reference cycle. In PMI1 this will be a
1141  # Representation object; in PMI2 it is the PMI2 State object itself.
1142  self._pmi_object = weakref.proxy(pmi_object)
1143  if hasattr(pmi_object, 'state'):
1144  # Need a strong ref to pmi_object.state to prevent it from being
1145  # cleaned up when doing PMI1 multistate modeling
1146  self._pmi_state = _SimpleRef(pmi_object.state)
1147  else:
1148  self._pmi_state = weakref.ref(pmi_object)
1149  # Preserve PMI state name
1150  old_name = self.name
1151  super().__init__(experiment_type='Fraction of bulk')
1152  self.name = old_name
1153 
1154  # The assembly of all components modeled by IMP in this state.
1155  # This may be smaller than the complete assembly.
1156  self.modeled_assembly = ihm.Assembly(
1157  name="Modeled assembly",
1158  description=self.get_postfixed_name(
1159  "All components modeled by IMP"))
1160  po.system.orphan_assemblies.append(self.modeled_assembly)
1161 
1162  self.all_modeled_components = []
1163 
1164  def __hash__(self):
1165  return hash(self._pmi_state())
1166 
1167  def __eq__(self, other):
1168  return self._pmi_state() == other._pmi_state()
1169 
1170  def add_model_group(self, group):
1171  self.append(group)
1172  return group
1173 
1174  def get_prefixed_name(self, name):
1175  """Prefix the given name with the state name, if available."""
1176  if self.short_name:
1177  return self.short_name + ' ' + name
1178  else:
1179  # Can't use capitalize() since that un-capitalizes everything
1180  # but the first letter
1181  return name[0].upper() + name[1:] if name else ''
1182 
1183  def get_postfixed_name(self, name):
1184  """Postfix the given name with the state name, if available."""
1185  if self.short_name:
1186  return "%s in state %s" % (name, self.short_name)
1187  else:
1188  return name
1189 
1190  short_name = property(lambda self: self._pmi_state().short_name)
1191  long_name = property(lambda self: self._pmi_state().long_name)
1192 
1193  def __get_name(self):
1194  return self._pmi_state().long_name
1195 
1196  def __set_name(self, val):
1197  self._pmi_state().long_name = val
1198 
1199  name = property(__get_name, __set_name)
1200 
1201 
1202 class Entity(ihm.Entity):
1203  """A single entity in the system. This contains information (such as
1204  database identifiers) specific to a particular sequence rather than
1205  a copy (for example, when modeling a homodimer, two AsymUnits will
1206  point to the same Entity).
1207 
1208  This functions identically to the base ihm.Entity class, but it
1209  allows identifying residues by either the PMI numbering scheme
1210  (which is always contiguous starting at 1, covering the entire
1211  sequence in the FASTA files, or the IHM scheme (seq_id, which also
1212  starts at 1, but which only covers the modeled subset of the full
1213  sequence, with non-modeled N-terminal or C-terminal residues
1214  removed). The actual offset (which is the integer to be added to the
1215  IHM numbering to get PMI numbering, or equivalently the number of
1216  not-represented N-terminal residues in the PMI sequence) is
1217  available in the `pmi_offset` member.
1218 
1219  If a UniProt accession was provided for the sequence (either when
1220  State.create_molecule() was called, or in the FASTA alignment file
1221  header) then that is available in the `uniprot` member, and can be
1222  added to the IHM system with the add_uniprot_reference method.
1223  """
1224  def __init__(self, sequence, pmi_offset, uniprot, *args, **keys):
1225  # Offset between PMI numbering and IHM; <pmi_#> = <ihm_#> + pmi_offset
1226  # (pmi_offset is also the number of N-terminal gaps in the FASTA file)
1227  self.pmi_offset = pmi_offset
1228  self.uniprot = uniprot
1229  super().__init__(sequence, *args, **keys)
1230 
1231  def pmi_residue(self, res_id):
1232  """Return a single IHM residue indexed using PMI numbering"""
1233  return self.residue(res_id - self.pmi_offset)
1234 
1235  def pmi_range(self, res_id_begin, res_id_end):
1236  """Return a range of IHM residues indexed using PMI numbering"""
1237  off = self.pmi_offset
1238  return self(res_id_begin - off, res_id_end - off)
1239 
1240  def add_uniprot_reference(self):
1241  """Add UniProt accession (if available) to the IHM system.
1242  If a UniProt accession was provided for the sequence (either when
1243  State.create_molecule() was called, or in the FASTA alignment file
1244  header), then look this up at the UniProt web site (requires
1245  network access) to get full information, and add it to the IHM
1246  system. The resulting reference object is returned. If the IMP
1247  and UniProt sequences are not identical, then this object may
1248  need to be modified by specifying an alignment and/or
1249  single-point mutations.
1250  """
1251  if self.uniprot:
1252  print('Adding UniProt accession %s reference for entity %s'
1253  % (self.uniprot, self.description))
1254  ref = ihm.reference.UniProtSequence.from_accession(self.uniprot)
1255  self.references.append(ref)
1256  return ref
1257 
1258 
1259 class AsymUnit(ihm.AsymUnit):
1260  """A single asymmetric unit in the system. This roughly corresponds to
1261  a single PMI subunit (sequence, copy, or clone).
1262 
1263  This functions identically to the base ihm.AsymUnit class, but it
1264  allows identifying residues by either the PMI numbering scheme
1265  (which is always contiguous starting at 1, covering the entire
1266  sequence in the FASTA files, or the IHM scheme (seq_id, which also
1267  starts at 1, but which only covers the modeled subset of the full
1268  sequence, with non-modeled N-terminal or C-terminal residues
1269  removed).
1270 
1271  The `entity` member of this class points to an Entity object, which
1272  contains information (such as database identifiers) specific to
1273  a particular sequence rather than a copy (for example, when modeling
1274  a homodimer, two AsymUnits will point to the same Entity).
1275  """
1276 
1277  def __init__(self, entity, *args, **keys):
1278  super().__init__(
1279  entity, auth_seq_id_map=entity.pmi_offset, *args, **keys)
1280 
1281  def pmi_residue(self, res_id):
1282  """Return a single IHM residue indexed using PMI numbering"""
1283  return self.residue(res_id - self.entity.pmi_offset)
1284 
1285  def pmi_range(self, res_id_begin, res_id_end):
1286  """Return a range of IHM residues indexed using PMI numbering"""
1287  off = self.entity.pmi_offset
1288  return self(res_id_begin - off, res_id_end - off)
1289 
1290 
1292  """Class to encode a modeling protocol as mmCIF.
1293 
1294  IMP has basic support for writing out files in mmCIF format, for
1295  deposition in [PDB-IHM](https://pdb-ihm.org/).
1296  After creating an instance of this class, attach it to an
1297  IMP.pmi.topology.System object. After this, any
1298  generated models and metadata are automatically collected in the
1299  `system` attribute, which is an
1300  [ihm.System](https://python-ihm.readthedocs.io/en/latest/main.html#ihm.System) object.
1301  Once the protocol is complete, call finalize() to make sure `system`
1302  contains everything, then use the
1303  [python-ihm API](https://python-ihm.readthedocs.io/en/latest/dumper.html#ihm.dumper.write)
1304  to write out files in mmCIF or BinaryCIF format.
1305 
1306  Each PMI subunit will be mapped to an IHM AsymUnit class which contains the
1307  subset of the sequence that was represented. Use the `asym_units` dict to
1308  get this object given a PMI subunit name. Each unique sequence will be
1309  mapped to an IHM Entity class (for example when modeling a homodimer
1310  there will be two AsymUnits which both point to the same Entity). Use
1311  the `entities` dict to get this object from a PMI subunit name.
1312 
1313  See also Entity, AsymUnit, get_handlers(), get_dumpers().
1314  """ # noqa: E501
1315  def __init__(self):
1316  # Ultimately, collect data in an ihm.System object
1317  self.system = ihm.System()
1318  self._state_group = ihm.model.StateGroup()
1319  self.system.state_groups.append(self._state_group)
1320 
1321  self._state_ensemble_offset = 0
1322  self._main_script = os.path.abspath(sys.argv[0])
1323 
1324  # Point to the main modeling script
1325  loc = ihm.location.WorkflowFileLocation(
1326  path=self._main_script,
1327  details="The main integrative modeling script")
1328  self.system.locations.append(loc)
1329 
1330  self._states = {}
1331  self.__asym_states = {}
1332  self._working_directory = os.getcwd()
1333  self.default_representation = self.create_representation(
1334  "Default representation")
1335  self.entities = _EntityMapper(self.system)
1336  # Mapping from component names to ihm.AsymUnit
1337  self.asym_units = {}
1338  self._all_components = {}
1339  self.all_modeled_components = []
1340  self._transformed_components = []
1341  self.sequence_dict = {}
1342 
1343  # Common geometric objects used in PMI systems
1344  self._xy_plane = ihm.geometry.XYPlane()
1345  self._xz_plane = ihm.geometry.XZPlane()
1346  self._z_axis = ihm.geometry.ZAxis()
1347  self._center_origin = ihm.geometry.Center(0, 0, 0)
1348  self._identity_transform = ihm.geometry.Transformation.identity()
1349 
1350  # Coordinates to exclude
1351  self._exclude_coords = {}
1352 
1353  self.all_representations = _AllModelRepresentations(self)
1354  self.all_protocols = _AllProtocols(self)
1355  self.all_datasets = _AllDatasets(self.system)
1356  self.all_starting_models = _AllStartingModels(self)
1357 
1358  self.all_software = _AllSoftware(self.system)
1359 
1360  def create_representation(self, name):
1361  """Create a new Representation and return it. This can be
1362  passed to add_model(), add_bead_element() or add_pdb_element()."""
1363  r = ihm.representation.Representation(name=name)
1364  self.system.orphan_representations.append(r)
1365  return r
1366 
1367  def exclude_coordinates(self, component, seqrange):
1368  """Don't record coordinates for the given domain.
1369  Coordinates for the given domain (specified by a component name
1370  and a 2-element tuple giving the start and end residue numbers)
1371  will be excluded from the mmCIF file. This can be used to exclude
1372  parts of the structure that weren't well resolved in modeling.
1373  Any bead or residue that lies wholly within this range will be
1374  excluded. Multiple ranges for a given component can be excluded
1375  by calling this method multiple times."""
1376  if component not in self._exclude_coords:
1377  self._exclude_coords[component] = []
1378  self._exclude_coords[component].append(seqrange)
1379 
1380  def _is_excluded(self, component, start, end):
1381  """Return True iff this chunk of sequence should be excluded"""
1382  for seqrange in self._exclude_coords.get(component, ()):
1383  if start >= seqrange[0] and end <= seqrange[1]:
1384  return True
1385 
1386  def _add_state(self, state):
1387  """Create a new state and return a pointer to it."""
1388  self._state_ensemble_offset = len(self.system.ensembles)
1389  s = _State(state, self)
1390  self._state_group.append(s)
1391  self._last_state = s
1392  return s
1393 
1394  def _get_chain_for_component(self, name, output):
1395  """Get the chain ID for a component, if any."""
1396  # todo: handle multiple copies
1397  if name in self.asym_units:
1398  return self.asym_units[name]._id
1399  else:
1400  # A non-modeled component doesn't have a chain ID
1401  return None
1402 
1403  def _get_assembly_comps(self, assembly):
1404  """Get the names of the components in the given assembly"""
1405  # component name = asym_unit.details
1406  comps = {}
1407  for ca in assembly:
1408  comps[ca.details] = None
1409  return comps
1410 
1411  def create_transformed_component(self, state, name, original, transform):
1412  """Make a new component that's a transformed copy of another.
1413  All representation for the existing component is copied to the
1414  new one."""
1415  assembly_comps = self._get_assembly_comps(state.modeled_assembly)
1416  if name in assembly_comps:
1417  raise ValueError("Component %s already exists" % name)
1418  elif original not in assembly_comps:
1419  raise ValueError("Original component %s does not exist" % original)
1420  self.create_component(state, name, True)
1421  self.add_component_sequence(state, name, self.sequence_dict[original])
1422  self._transformed_components.append(_TransformedComponent(
1423  name, original, transform))
1424  self.all_representations.copy_component(state, name, original,
1425  self.asym_units[name])
1426 
1427  def create_component(self, state, name, modeled, asym_name=None):
1428  if asym_name is None:
1429  asym_name = name
1430  new_comp = name not in self._all_components
1431  self._all_components[name] = None
1432  if modeled:
1433  state.all_modeled_components.append(name)
1434  if asym_name not in self.asym_units:
1435  # assign asym once we get sequence
1436  self.asym_units[asym_name] = None
1437  if new_comp:
1438  self.all_modeled_components.append(name)
1439 
1440  def add_component_sequence(self, state, name, seq, asym_name=None,
1441  alphabet=None, uniprot=None):
1442  if asym_name is None:
1443  asym_name = name
1444 
1445  if name in self.sequence_dict:
1446  if self.sequence_dict[name] != seq:
1447  raise ValueError("Sequence mismatch for component %s" % name)
1448  else:
1449  self.sequence_dict[name] = seq
1450  # Offset is always zero to start with; this may be modified
1451  # later in finalize_build() if any non-modeled N-terminal
1452  # residues are removed
1453  self.entities.add(name, seq, 0, alphabet, uniprot)
1454  if asym_name in self.asym_units:
1455  if self.asym_units[asym_name] is None:
1456  # Set up a new asymmetric unit for this component
1457  entity = self.entities[name]
1458  asym = AsymUnit(entity, details=asym_name)
1459  self.system.asym_units.append(asym)
1460  self.asym_units[asym_name] = asym
1461  state.modeled_assembly.append(self.asym_units[asym_name])
1462 
1463  def finalize_build(self):
1464  """Called immediately after the PMI system is built"""
1465  for entity in self.system.entities:
1466  _trim_unrep_termini(entity, self.system.asym_units,
1467  self.system.orphan_representations)
1468 
1469  def _add_restraint_model_fits(self):
1470  """Add fits to restraints for all known models"""
1471  for group, m in self.system._all_models():
1472  if m._is_restrained:
1473  for r in self.system.restraints:
1474  if hasattr(r, 'add_fits_from_model_statfile'):
1475  r.add_fits_from_model_statfile(m)
1476 
1477  def finalize(self):
1478  """Do any final processing on the class hierarchy.
1479  After calling this method, the `system` member (an instance
1480  of `ihm.System`) completely reproduces the PMI modeling, and
1481  can be written out to an mmCIF file with `ihm.dumper.write`,
1482  and/or modified using the ihm API."""
1483  self._add_restraint_model_fits()
1484 
1485  def add_pdb_element(self, state, name, start, end, offset, pdbname,
1486  chain, hier, representation=None):
1487  if self._is_excluded(name, start, end):
1488  return
1489  if representation is None:
1490  representation = self.default_representation
1491  asym = self.asym_units[name]
1492  p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
1493  hier, asym)
1494  self.all_representations.add_fragment(state, representation, p)
1495  self.all_starting_models.add_pdb_fragment(p)
1496 
1497  def add_bead_element(self, state, name, start, end, num, hier,
1498  representation=None):
1499  if self._is_excluded(name, start, end):
1500  return
1501  if representation is None:
1502  representation = self.default_representation
1503  asym = self.asym_units[name]
1504  pmi_offset = asym.entity.pmi_offset
1505  b = _BeadsFragment(state, name, start - pmi_offset, end - pmi_offset,
1506  num, hier, asym)
1507  self.all_representations.add_fragment(state, representation, b)
1508 
1509  def get_cross_link_group(self, pmi_restraint):
1510  r = _CrossLinkRestraint(pmi_restraint)
1511  self.system.restraints.append(r)
1512  self._add_restraint_dataset(r) # so that all-dataset group works
1513  return r
1514 
1515  def add_experimental_cross_link(self, r1, c1, r2, c2, rsr):
1516  if c1 not in self._all_components or c2 not in self._all_components:
1517  # Crosslink refers to a component we didn't model
1518  # As a quick hack, just ignore it.
1519  # todo: need to add an entity for this anyway (so will need the
1520  # sequence, and add to struct_assembly)
1521  return None
1522  e1 = self.entities[c1]
1523  e2 = self.entities[c2]
1524  xl = ihm.restraint.ExperimentalCrossLink(residue1=e1.pmi_residue(r1),
1525  residue2=e2.pmi_residue(r2))
1526  rsr.experimental_cross_links.append([xl])
1527  return xl
1528 
1529  def add_cross_link(self, state, ex_xl, p1, p2, length, sigma1_p, sigma2_p,
1530  psi_p, rsr):
1531  # Map p1/p2 to asym units
1532  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1533  d = ihm.restraint.UpperBoundDistanceRestraint(length)
1534 
1535  if _get_by_residue(p1) and _get_by_residue(p2):
1536  cls = _ResidueCrossLink
1537  else:
1538  cls = _FeatureCrossLink
1539  xl = cls(ex_xl, asym1=asym[p1], asym2=asym[p2], distance=d,
1540  restrain_all=False)
1541  # Needed to look up fits later
1542  xl.psi_p, xl.sigma1_p, xl.sigma2_p = psi_p, sigma1_p, sigma2_p
1543  rsr.cross_links.append(xl)
1544 
1545  def add_replica_exchange(self, state, rex):
1546  # todo: allow for metadata to say how many replicas were used in the
1547  # actual experiment, and how many independent runs were carried out
1548  # (use these as multipliers to get the correct total number of
1549  # output models)
1550  step = _ReplicaExchangeProtocolStep(state, rex)
1551  step.software = self.all_software.pmi
1552  self.all_protocols.add_step(step, state)
1553 
1554  def _add_simple_dynamics(self, num_models_end, method):
1555  # Always assumed that we're dealing with the last state
1556  state = self._last_state
1557  self.all_protocols.add_step(_SimpleProtocolStep(state, num_models_end,
1558  method), state)
1559 
1560  def _add_protocol(self):
1561  # Always assumed that we're dealing with the last state
1562  state = self._last_state
1563  self.all_protocols.add_protocol(state)
1564 
1565  def _add_dataset(self, dataset):
1566  return self.all_datasets.add(self._last_state, dataset)
1567 
1568  def _add_restraint_dataset(self, restraint):
1569  return self.all_datasets.add_restraint(self._last_state, restraint)
1570 
1571  def _add_simple_postprocessing(self, num_models_begin, num_models_end):
1572  # Always assumed that we're dealing with the last state
1573  state = self._last_state
1574  pp = ihm.analysis.ClusterStep('RMSD', num_models_begin, num_models_end)
1575  self.all_protocols.add_postproc(pp, state)
1576  return pp
1577 
1578  def _add_no_postprocessing(self, num_models):
1579  # Always assumed that we're dealing with the last state
1580  state = self._last_state
1581  pp = ihm.analysis.EmptyStep()
1582  pp.num_models_begin = pp.num_models_end = num_models
1583  self.all_protocols.add_postproc(pp, state)
1584  return pp
1585 
1586  def _add_simple_ensemble(self, pp, name, num_models, drmsd,
1587  num_models_deposited, localization_densities,
1588  ensemble_file):
1589  """Add an ensemble generated by ad hoc methods (not using PMI).
1590  This is currently only used by the Nup84 system."""
1591  # Always assumed that we're dealing with the last state
1592  state = self._last_state
1593  group = ihm.model.ModelGroup(name=state.get_postfixed_name(name))
1594  state.add_model_group(group)
1595  if ensemble_file:
1596  self.system.locations.append(ensemble_file)
1597  e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
1598  ensemble_file)
1599  self.system.ensembles.append(e)
1600  for c in state.all_modeled_components:
1601  den = localization_densities.get(c, None)
1602  if den:
1603  e.load_localization_density(state, c, self.asym_units[c], den)
1604  return e
1605 
1606  def set_ensemble_file(self, i, location):
1607  """Point a previously-created ensemble to an 'all-models' file.
1608  This could be a trajectory such as DCD, an RMF, or a multimodel
1609  PDB file."""
1610  self.system.locations.append(location)
1611  # Ensure that we point to an ensemble related to the current state
1612  ind = i + self._state_ensemble_offset
1613  self.system.ensembles[ind].file = location
1614 
1615  def add_replica_exchange_analysis(self, state, rex, density_custom_ranges):
1616  # todo: add prefilter as an additional postprocess step (complication:
1617  # we don't know how many models it filtered)
1618  # todo: postpone rmsf/density localization extraction until after
1619  # relevant methods are called (currently it is done from the
1620  # constructor)
1621  protocol = self.all_protocols.get_last_protocol(state)
1622  num_models = protocol.steps[-1].num_models_end
1623  pp = _ReplicaExchangeAnalysisPostProcess(rex, num_models)
1624  pp.software = self.all_software.pmi
1625  self.all_protocols.add_postproc(pp, state)
1626  for i in range(rex._number_of_clusters):
1627  group = ihm.model.ModelGroup(name=state.get_prefixed_name(
1628  'cluster %d' % (i + 1)))
1629  state.add_model_group(group)
1630  # todo: make # of models to deposit configurable somewhere
1631  e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
1632  self.system.ensembles.append(e)
1633  # Add localization density info if available
1634  for fname, stuple in sorted(density_custom_ranges.items()):
1635  e.load_localization_density(state, fname, stuple,
1636  self.asym_units)
1637  for stats in e.load_all_models(self, state):
1638  m = self.add_model(group)
1639  # Since we currently only deposit 1 model, it is the
1640  # best scoring one
1641  m.name = 'Best scoring model'
1642  m.stats = stats
1643  # Add RMSF info if available
1644  for c in state.all_modeled_components:
1645  e.load_rmsf(m, c)
1646 
1647  def _get_subassembly(self, comps, name, description):
1648  """Get an Assembly consisting of the given components.
1649  `compdict` is a dictionary of the components to add, where keys
1650  are the component names and values are the sequence ranges (or
1651  None to use all residues in the component)."""
1652  asyms = []
1653  for comp, seqrng in comps.items():
1654  a = self.asym_units[comp]
1655  asyms.append(a if seqrng is None else a(*seqrng))
1656 
1657  a = ihm.Assembly(asyms, name=name, description=description)
1658  return a
1659 
1660  def _add_foxs_restraint(self, model, comp, seqrange, dataset, rg, chi,
1661  details):
1662  """Add a basic FoXS fit. This is largely intended for use from the
1663  NPC application."""
1664  assembly = self._get_subassembly(
1665  {comp: seqrange},
1666  name="SAXS subassembly",
1667  description="All components that fit SAXS data")
1668  r = ihm.restraint.SASRestraint(
1669  dataset, assembly, segment=False,
1670  fitting_method='FoXS', fitting_atom_type='Heavy atoms',
1671  multi_state=False, radius_of_gyration=rg, details=details)
1672  r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi)
1673  self.system.restraints.append(r)
1674  self._add_restraint_dataset(r) # so that all-dataset group works
1675 
1676  def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
1677  image_resolution, projection_number,
1678  micrographs_number):
1679  r = _EM2DRestraint(state, r, i, resolution, pixel_size,
1680  image_resolution, projection_number,
1681  micrographs_number)
1682  self.system.restraints.append(r)
1683  self._add_restraint_dataset(r) # so that all-dataset group works
1684 
1685  def add_em3d_restraint(self, state, target_ps, densities, pmi_restraint):
1686  # todo: need to set allow_duplicates on this dataset?
1687  r = _EM3DRestraint(self, state, pmi_restraint, target_ps, densities)
1688  self.system.restraints.append(r)
1689  self._add_restraint_dataset(r) # so that all-dataset group works
1690 
1691  def add_zaxial_restraint(self, state, ps, lower_bound, upper_bound,
1692  sigma, pmi_restraint):
1693  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1694  sigma, pmi_restraint, self._xy_plane)
1695 
1696  def add_yaxial_restraint(self, state, ps, lower_bound, upper_bound,
1697  sigma, pmi_restraint):
1698  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1699  sigma, pmi_restraint, self._xz_plane)
1700 
1701  def add_xyradial_restraint(self, state, ps, lower_bound, upper_bound,
1702  sigma, pmi_restraint):
1703  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1704  sigma, pmi_restraint, self._z_axis)
1705 
1706  def _add_geometric_restraint(self, state, ps, lower_bound, upper_bound,
1707  sigma, pmi_restraint, geom):
1708  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1709  r = _GeometricRestraint(
1710  self, state, pmi_restraint, geom, asym.get_feature(ps),
1711  ihm.restraint.LowerUpperBoundDistanceRestraint(lower_bound,
1712  upper_bound),
1713  sigma)
1714  self.system.restraints.append(r)
1715  self._add_restraint_dataset(r) # so that all-dataset group works
1716 
1717  def _get_membrane(self, tor_R, tor_r, tor_th):
1718  """Get an object representing a half-torus membrane"""
1719  if not hasattr(self, '_seen_membranes'):
1720  self._seen_membranes = {}
1721  # If a membrane already exists with all dimensions within 0.01 of
1722  # this one, reuse it
1723  membrane_id = tuple(int(x * 100.) for x in (tor_R, tor_r, tor_th))
1724  if membrane_id not in self._seen_membranes:
1725  m = ihm.geometry.HalfTorus(
1726  center=self._center_origin,
1727  transformation=self._identity_transform,
1728  major_radius=tor_R, minor_radius=tor_r, thickness=tor_th,
1729  inner=True, name='Membrane')
1730  self._seen_membranes[membrane_id] = m
1731  return self._seen_membranes[membrane_id]
1732 
1733  def add_membrane_surface_location_restraint(
1734  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1735  self._add_membrane_restraint(
1736  state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint,
1737  ihm.restraint.UpperBoundDistanceRestraint(0.))
1738 
1739  def add_membrane_exclusion_restraint(
1740  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1741  self._add_membrane_restraint(
1742  state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint,
1743  ihm.restraint.LowerBoundDistanceRestraint(0.))
1744 
1745  def _add_membrane_restraint(self, state, ps, tor_R, tor_r, tor_th,
1746  sigma, pmi_restraint, rsr):
1747  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1748  r = _GeometricRestraint(
1749  self, state, pmi_restraint,
1750  self._get_membrane(tor_R, tor_r, tor_th), asym.get_feature(ps),
1751  rsr, sigma)
1752  self.system.restraints.append(r)
1753  self._add_restraint_dataset(r) # so that all-dataset group works
1754 
1755  def add_model(self, group, assembly=None, representation=None):
1756  state = self._last_state
1757  if representation is None:
1758  representation = self.default_representation
1759  protocol = self.all_protocols.get_last_protocol(state)
1760  m = _Model(state.prot, self, protocol,
1761  assembly if assembly else state.modeled_assembly,
1762  representation)
1763  group.append(m)
1764  return m
1765 
1766 
1768  """Get custom python-ihm dumpers for writing PMI to from mmCIF.
1769  This returns a list of custom dumpers that can be passed as all or
1770  part of the `dumpers` argument to ihm.dumper.write(). They add
1771  PMI-specific information to mmCIF or BinaryCIF files written out
1772  by python-ihm."""
1773  return [_ReplicaExchangeProtocolDumper]
1774 
1775 
1777  """Get custom python-ihm handlers for reading PMI data from mmCIF.
1778  This returns a list of custom handlers that can be passed as all or
1779  part of the `handlers` argument to ihm.reader.read(). They read
1780  PMI-specific information from mmCIF or BinaryCIF files read in
1781  by python-ihm."""
1782  return [_ReplicaExchangeProtocolHandler]
1783 
1784 
1785 class GMMParser(ihm.metadata.Parser):
1786  """Extract metadata from an EM density GMM file."""
1787 
1788  def parse_file(self, filename):
1789  """Extract metadata from `filename`.
1790  @return a dict with key `dataset` pointing to the GMM file and
1791  `number_of_gaussians` to the number of GMMs (or None)"""
1792  loc = ihm.location.InputFileLocation(
1793  filename,
1794  details="Electron microscopy density map, "
1795  "represented as a Gaussian Mixture Model (GMM)")
1796  # A 3DEM restraint's dataset ID uniquely defines the mmCIF
1797  # restraint, so we need to allow duplicates
1798  loc._allow_duplicates = True
1799  d = ihm.dataset.EMDensityDataset(loc)
1800  ret = {'dataset': d, 'number_of_gaussians': None}
1801 
1802  with open(filename) as fh:
1803  for line in fh:
1804  if line.startswith('# data_fn: '):
1805  p = ihm.metadata.MRCParser()
1806  fn = line[11:].rstrip('\r\n')
1807  dataset = p.parse_file(os.path.join(
1808  os.path.dirname(filename), fn))['dataset']
1809  ret['dataset'].parents.append(dataset)
1810  elif line.startswith('# ncenters: '):
1811  ret['number_of_gaussians'] = int(line[12:])
1812  return ret
1813 
1814 
1815 def _trim_unrep_termini(entity, asyms, representations):
1816  """Trim Entity sequence to only cover represented residues.
1817 
1818  PDB policy is for amino acid Entity sequences to be polymers (so
1819  they should include any loops or other gaps in the midst of the
1820  sequence) but for the termini to be trimmed of any not-modeled
1821  residues. Here, we modify the Entity sequence to remove any parts
1822  that are not included in any representation. This may change the
1823  numbering if any N-terminal residues are removed, and thus the offset
1824  between PMI and IHM numbering, as both count from 1."""
1825  rep_range = None
1826  for rep in representations:
1827  for seg in rep:
1828  if seg.asym_unit.entity is entity:
1829  seg_range = seg.asym_unit.seq_id_range
1830  if rep_range is None:
1831  rep_range = list(seg_range)
1832  else:
1833  rep_range[0] = min(rep_range[0], seg_range[0])
1834  rep_range[1] = max(rep_range[1], seg_range[1])
1835  # Do nothing if no representations or if everything is represented
1836  if rep_range is None or rep_range == [1, len(entity.sequence)]:
1837  return
1838  # Update offset (= number of unrepresented N terminal entity residues)
1839  # in entity and all asyms
1840  # Force offset to be a real Python int (not numpy.int64) as python-ihm
1841  # up to version 1.8 requires auth_seq_id_map to be int.
1842  pmi_offset = int(rep_range[0]) - 1
1843  entity.pmi_offset = pmi_offset
1844  for asym in asyms:
1845  if asym.entity is entity:
1846  asym.auth_seq_id_map = entity.pmi_offset
1847  # Modify entity sequence to only include represented residues
1848  entity.sequence = entity.sequence[rep_range[0] - 1:rep_range[1]]
1849 
1850  # If N terminal deletions, we must fix the numbering of all segments
1851  # and starting models
1852  if pmi_offset != 0:
1853  for rep in representations:
1854  for seg in rep:
1855  if seg.asym_unit.entity is entity:
1856  seg_range = seg.asym_unit.seq_id_range
1857  seg.asym_unit.seq_id_range = (seg_range[0] - pmi_offset,
1858  seg_range[1] - pmi_offset)
1859  if seg.starting_model:
1860  model = seg.starting_model
1861  seg_range = model.asym_unit.seq_id_range
1862  model.asym_unit.seq_id_range = \
1863  (seg_range[0] - pmi_offset,
1864  seg_range[1] - pmi_offset)
1865  model.offset = model.offset - pmi_offset
Select non water and non hydrogen atoms.
Definition: pdb.h:314
def get_handlers
Get custom python-ihm handlers for reading PMI data from mmCIF.
Definition: mmcif.py:1776
def create_representation
Create a new Representation and return it.
Definition: mmcif.py:1360
def create_transformed_component
Make a new component that's a transformed copy of another.
Definition: mmcif.py:1411
Simple 3D transformation class.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:158
def exclude_coordinates
Don't record coordinates for the given domain.
Definition: mmcif.py:1370
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
def add_uniprot_reference
Add UniProt accession (if available) to the IHM system.
Definition: mmcif.py:1246
def finalize_build
Called immediately after the PMI system is built.
Definition: mmcif.py:1463
Class to encode a modeling protocol as mmCIF.
Definition: mmcif.py:1311
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:245
def parse_file
Extract metadata from filename.
Definition: mmcif.py:1789
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: mmcif.py:1235
Miscellaneous utilities.
Definition: pmi/tools.py:1
Extract metadata from an EM density GMM file.
Definition: mmcif.py:1785
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:749
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: mmcif.py:1281
A single asymmetric unit in the system.
Definition: mmcif.py:1265
A single entity in the system.
Definition: mmcif.py:1202
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
Definition: mmcif.py:1606
Classes to represent data structures used in mmCIF.
Definition: data.py:1
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: mmcif.py:1285
void add_restraint(RMF::FileHandle fh, Restraint *hs)
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def finalize
Do any final processing on the class hierarchy.
Definition: mmcif.py:1477
Base class for capturing a modeling protocol.
Definition: output.py:41
Basic utilities for handling cryo-electron microscopy 3D density maps.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
def get_dumpers
Get custom python-ihm dumpers for writing PMI to from mmCIF.
Definition: mmcif.py:1767
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:199
3D rotation class.
Definition: Rotation3D.h:52
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
A decorator for a residue.
Definition: Residue.h:137
Basic functionality that is expected to be used by a wide variety of IMP users.
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: mmcif.py:1231
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
VectorD< 3 > Vector3D
Definition: VectorD.h:408
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:256
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...