IMP logo
IMP Reference Guide  2.22.0
The Integrative Modeling Platform
mmcif.py
1 """@namespace IMP.pmi.mmcif
2  @brief Support for the mmCIF file format.
3 
4  IMP has basic support for writing out files in mmCIF format, for
5  deposition in [PDB-IHM](https://pdb-ihm.org/).
6  mmCIF files are currently generated by creating an
7  IMP.pmi.mmcif.ProtocolOutput class, and attaching it to an
8  IMP.pmi.representation.Representation object, after which any
9  generated models and metadata are collected and output as mmCIF.
10 """
11 
12 import copy
13 import RMF
14 import IMP.core
15 import IMP.algebra
16 import IMP.atom
17 import IMP.em
18 import IMP.isd
19 import IMP.pmi.tools
20 import IMP.pmi.alphabets
21 from IMP.pmi.tools import OrderedDict
22 import IMP.pmi.output
23 import IMP.mmcif.data
24 import re
25 import ast
26 import sys
27 import os
28 import weakref
29 import ihm.location
30 import ihm.dataset
31 import ihm.dumper
32 import ihm.reader
33 import ihm.metadata
34 import ihm.startmodel
35 import ihm.model
36 import ihm.protocol
37 import ihm.analysis
38 import ihm.representation
39 import ihm.geometry
40 import ihm.cross_linkers
41 import ihm.reference
42 
43 
44 def _assign_id(obj, seen_objs, obj_by_id):
45  """Assign a unique ID to obj, and track all ids in obj_by_id."""
46  if obj not in seen_objs:
47  if not hasattr(obj, 'id'):
48  obj_by_id.append(obj)
49  obj.id = len(obj_by_id)
50  seen_objs[obj] = obj.id
51  else:
52  obj.id = seen_objs[obj]
53 
54 
55 def _get_by_residue(p):
56  """Determine whether the given particle represents a specific residue
57  or a more coarse-grained object."""
59 
60 
61 class _ComponentMapper:
62  """Map a Particle to a component name"""
63  def __init__(self, prot):
64  self.o = IMP.pmi.output.Output()
65  self.prot = prot
66  self.name = 'cif-output'
67  self.o.dictionary_pdbs[self.name] = self.prot
68  self.o._init_dictchain(self.name, self.prot,
69  multichar_chain=True, mmcif=True)
70 
71  def __getitem__(self, p):
72  protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
73  return protname
74 
75 
76 class _AsymMapper:
77  """Map a Particle to an asym_unit"""
78  def __init__(self, simo, prot):
79  self.simo = simo
80  self._cm = _ComponentMapper(prot)
81  self._seen_ranges = {}
82 
83  def __getitem__(self, p):
84  protname = self._cm[p]
85  return self.simo.asym_units[protname]
86 
87  def get_feature(self, ps):
88  """Get an ihm.restraint.Feature that covers the given particles"""
89  # todo: handle things other than residues
90  rngs = []
91  for p in ps:
92  asym = self[p]
93  # todo: handle overlapping ranges
95  rind = IMP.atom.Residue(p).get_index()
96  rng = asym(rind, rind)
98  # PMI Fragments always contain contiguous residues
99  rinds = IMP.atom.Fragment(p).get_residue_indexes()
100  rng = asym(rinds[0], rinds[-1])
101  else:
102  raise ValueError("Unsupported particle type %s" % str(p))
103  # Join contiguous ranges
104  if len(rngs) > 0 and rngs[-1].asym == asym \
105  and rngs[-1].seq_id_range[1] == rng.seq_id_range[0] - 1:
106  rngs[-1].seq_id_range = (rngs[-1].seq_id_range[0],
107  rng.seq_id_range[1])
108  else:
109  rngs.append(rng)
110  # If an identical feature already exists, return that
111  # todo: python-ihm should handle this automatically for us
112  hrngs = tuple(rngs)
113  if hrngs in self._seen_ranges:
114  return self._seen_ranges[hrngs]
115  else:
116  feat = ihm.restraint.ResidueFeature(rngs)
117  self._seen_ranges[hrngs] = feat
118  return feat
119 
120 
121 class _AllSoftware:
122  def __init__(self, system):
123  self.system = system
124  self.modeller_used = self.phyre2_used = False
125  self.pmi = ihm.Software(
126  name="IMP PMI module",
127  version=IMP.pmi.__version__,
128  classification="integrative model building",
129  description="integrative model building",
130  location='https://integrativemodeling.org')
131  self.imp = ihm.Software(
132  name="Integrative Modeling Platform (IMP)",
133  version=IMP.__version__,
134  classification="integrative model building",
135  description="integrative model building",
136  location='https://integrativemodeling.org')
137  # Only recent versions of python-ihm support adding citations for
138  # software
139  if hasattr(self.imp, 'citation'):
140  javi = 'Vel\u00e1zquez-Muriel J'
141  self.imp.citation = ihm.Citation(
142  pmid='22272186',
143  title='Putting the pieces together: integrative modeling '
144  'platform software for structure determination of '
145  'macromolecular assemblies',
146  journal='PLoS Biol', volume=10, page_range='e1001244',
147  year=2012,
148  authors=['Russel D', 'Lasker K', 'Webb B', javi, 'Tjioe E',
149  'Schneidman-Duhovny D', 'Peterson B', 'Sali A'],
150  doi='10.1371/journal.pbio.1001244')
151  self.pmi.citation = ihm.Citation(
152  pmid='31396911',
153  title='Modeling Biological Complexes Using Integrative '
154  'Modeling Platform.',
155  journal='Methods Mol Biol', volume=2022, page_range=(353, 377),
156  year=2019,
157  authors=['Saltzberg D', 'Greenberg CH', 'Viswanath S',
158  'Chemmama I', 'Webb B', 'Pellarin R', 'Echeverria I',
159  'Sali A'],
160  doi='10.1007/978-1-4939-9608-7_15')
161  self.system.software.extend([self.pmi, self.imp])
162 
163  def set_modeller_used(self, version, date):
164  if self.modeller_used:
165  return
166  self.modeller_used = True
167  s = ihm.Software(
168  name='MODELLER', classification='comparative modeling',
169  description='Comparative modeling by satisfaction '
170  'of spatial restraints, build ' + date,
171  location='https://salilab.org/modeller/', version=version)
172  self.system.software.append(s)
173  if hasattr(s, 'citation'):
174  s.citation = ihm.Citation(
175  pmid='8254673',
176  title='Comparative protein modelling by satisfaction of '
177  'spatial restraints.',
178  journal='J Mol Biol', volume=234, page_range=(779, 815),
179  year=1993, authors=['Sali A', 'Blundell TL'],
180  doi='10.1006/jmbi.1993.1626')
181 
182  def set_phyre2_used(self):
183  if self.phyre2_used:
184  return
185  self.phyre2_used = True
186  s = ihm.Software(
187  name='Phyre2', classification='protein homology modeling',
188  description='Protein Homology/analogY Recognition Engine V 2.0',
189  version='2.0', location='http://www.sbg.bio.ic.ac.uk/~phyre2/')
190  if hasattr(s, 'citation'):
191  s.citation = ihm.Citation(
192  pmid='25950237',
193  title='The Phyre2 web portal for protein modeling, '
194  'prediction and analysis.',
195  journal='Nat Protoc', volume=10, page_range=(845, 858),
196  authors=['Kelley LA', 'Mezulis S', 'Yates CM', 'Wass MN',
197  'Sternberg MJ'],
198  year=2015, doi='10.1038/nprot.2015.053')
199  self.system.software.append(s)
200 
201 
202 def _get_fragment_is_rigid(fragment):
203  """Determine whether a fragment is modeled rigidly"""
204  leaves = IMP.atom.get_leaves(fragment.hier)
205  # Assume all leaves are set up as rigid/flexible in the same way
206  return IMP.core.RigidMember.get_is_setup(leaves[0])
207 
208 
209 class _PDBFragment(ihm.representation.ResidueSegment):
210  """Record details about part of a PDB file used as input
211  for a component."""
212  def __init__(self, state, component, start, end, pdb_offset,
213  pdbname, chain, hier, asym_unit):
214  # start, end are PMI residue indexes (not IHM)
215  super().__init__(
216  asym_unit=asym_unit.pmi_range(start, end),
217  rigid=None, primitive='sphere')
218  self.component, self.start, self.end, self.offset, self.pdbname \
219  = component, start, end, pdb_offset, pdbname
220  self.state, self.chain, self.hier = state, chain, hier
222  & IMP.atom.ChainPDBSelector([chain])
223  self.starting_hier = IMP.atom.read_pdb(pdbname, state.model, sel)
224 
225  rigid = property(lambda self: _get_fragment_is_rigid(self),
226  lambda self, val: None)
227 
228  def combine(self, other):
229  pass
230 
231 
232 class _BeadsFragment(ihm.representation.FeatureSegment):
233  """Record details about beads used to represent part of a component."""
234  chain = None
235 
236  def __init__(self, state, component, start, end, count, hier, asym_unit):
237  super().__init__(
238  asym_unit=asym_unit(start, end), rigid=None, primitive='sphere',
239  count=count)
240  self.state, self.component, self.hier = state, component, hier
241 
242  rigid = property(lambda self: _get_fragment_is_rigid(self),
243  lambda self, val: None)
244 
245  def combine(self, other):
246  # todo: don't combine if one fragment is rigid and the other flexible
247  if (type(other) == type(self) and # noqa: E721
248  other.asym_unit.seq_id_range[0]
249  == self.asym_unit.seq_id_range[1] + 1):
250  self.asym_unit.seq_id_range = (self.asym_unit.seq_id_range[0],
251  other.asym_unit.seq_id_range[1])
252  self.count += other.count
253  return True
254 
255 
256 class _AllModelRepresentations:
257  def __init__(self, simo):
258  self.simo = simo
259  # dict of fragments, ordered by representation, then component name,
260  # then state
261  self.fragments = OrderedDict()
262  self._all_representations = {}
263 
264  def copy_component(self, state, name, original, asym_unit):
265  """Copy all representation for `original` in `state` to `name`"""
266  def copy_frag(f):
267  newf = copy.copy(f)
268  newf.asym_unit = asym_unit(*f.asym_unit.seq_id_range)
269  return newf
270  for rep in self.fragments:
271  if original in self.fragments[rep]:
272  if name not in self.fragments[rep]:
273  self.fragments[rep][name] = OrderedDict()
274  self.fragments[rep][name][state] = [
275  copy_frag(f) for f in self.fragments[rep][original][state]]
276  # Assume representation for a component is the same in all
277  # states, so only write out the first one
278  first_state = list(self.fragments[rep][name].keys())[0]
279  if state is first_state:
280  representation = self._all_representations[rep]
281  representation.extend(self.fragments[rep][name][state])
282 
283  def add_fragment(self, state, representation, fragment):
284  """Add a model fragment."""
285  comp = fragment.component
286  id_rep = id(representation)
287  self._all_representations[id_rep] = representation
288  if id_rep not in self.fragments:
289  self.fragments[id_rep] = OrderedDict()
290  if comp not in self.fragments[id_rep]:
291  self.fragments[id_rep][comp] = OrderedDict()
292  if state not in self.fragments[id_rep][comp]:
293  self.fragments[id_rep][comp][state] = []
294  fragments = self.fragments[id_rep][comp][state]
295  if len(fragments) == 0 or not fragments[-1].combine(fragment):
296  fragments.append(fragment)
297  # Assume representation for a component is the same in all states,
298  # so only write out the first one
299  first_state = list(self.fragments[id_rep][comp].keys())[0]
300  if state is first_state:
301  representation.append(fragment)
302 
303 
304 class _AllDatasets:
305  """Track all datasets generated by PMI and add them to the ihm.System"""
306  def __init__(self, system):
307  self.system = system
308  self._datasets = []
309  self._datasets_by_state = {}
310  self._restraints_by_state = {}
311 
312  def get_all_group(self, state):
313  """Get a DatasetGroup encompassing all datasets so far in this state"""
314  # Note that if a restraint dataset is replaced after this is called,
315  # the group will still contain the old dataset - mark dataset as read
316  # only?
317  g = ihm.dataset.DatasetGroup(
318  self._datasets_by_state.get(state, [])
319  + [r.dataset for r in self._restraints_by_state.get(state, [])
320  if r.dataset])
321  return g
322 
323  def add(self, state, dataset):
324  """Add a new dataset."""
325  self._datasets.append(dataset)
326  if state not in self._datasets_by_state:
327  self._datasets_by_state[state] = []
328  self._datasets_by_state[state].append(dataset)
329  # Make sure that the dataset ends up in the mmCIF file even if nothing
330  # refers to it
331  self.system.orphan_datasets.append(dataset)
332  return dataset
333 
334  def add_restraint(self, state, restraint):
335  """Add the dataset for a restraint"""
336  if state not in self._restraints_by_state:
337  self._restraints_by_state[state] = []
338  self._restraints_by_state[state].append(restraint)
339 
340 
341 class _CrossLinkRestraint(ihm.restraint.CrossLinkRestraint):
342  """Restrain to a set of cross-links"""
343 
344  assembly = None
345  _label_map = {'wtDSS': 'DSS', 'scDSS': 'DSS', 'scEDC': 'EDC'}
346  _descriptor_map = {'DSS': ihm.cross_linkers.dss,
347  'EDC': ihm.cross_linkers.edc}
348 
349  def __init__(self, pmi_restraint):
350  self.pmi_restraint = pmi_restraint
351  # Use ChemDescriptor from PMI restraint if available, otherwise guess
352  # it using the label
353  linker = getattr(self.pmi_restraint, 'linker', None)
354  label = self.pmi_restraint.label
355  self.label = label
356  super().__init__(
357  dataset=self.pmi_restraint.dataset,
358  linker=linker or self._get_chem_descriptor(label))
359 
360  @classmethod
361  def _get_chem_descriptor(cls, label):
362  # Map commonly-used subtypes to more standard labels
363  label = cls._label_map.get(label, label)
364  if label not in cls._descriptor_map:
365  # If label is not a standard linker type, make a new chemical
366  # descriptor containing just the name. We don't know the chemistry
367  # so cannot specify a SMILES or INCHI string for it at this point
368  d = ihm.ChemDescriptor(label)
369  cls._descriptor_map[label] = d
370  return cls._descriptor_map[label]
371 
372  def _set_psi_sigma(self, model):
373  old_values = []
374  # Do nothing if given a different state than the restraint applies to
375  if model.m != self.pmi_restraint.model:
376  return
377  for resolution in self.pmi_restraint.sigma_dictionary:
378  statname = 'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, self.label)
379  if model.stats and statname in model.stats:
380  sigma = float(model.stats[statname])
381  p = self.pmi_restraint.sigma_dictionary[resolution][0]
382  old_values.append((p, p.get_scale()))
383  p.set_scale(sigma)
384  for psiindex in self.pmi_restraint.psi_dictionary:
385  statname = 'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, self.label)
386  if model.stats and statname in model.stats:
387  psi = float(model.stats[statname])
388  p = self.pmi_restraint.psi_dictionary[psiindex][0]
389  old_values.append((p, p.get_scale()))
390  p.set_scale(psi)
391  # If the same particle is set multiple times, make sure we get the
392  # original value last
393  return list(reversed(old_values))
394 
395  def add_fits_from_model_statfile(self, model):
396  # Set psi/sigma for all particles
397  old_values = self._set_psi_sigma(model)
398  # If no stats were collected, we can't show the fit
399  if not old_values:
400  return
401  for xl in self.cross_links:
402  # Get current psi/sigma particle value for each XL
403  xl.fits[model] = ihm.restraint.CrossLinkFit(
404  psi=xl.psi, sigma1=xl.sigma1, sigma2=xl.sigma2)
405  # Restore original psi/sigma
406  for p, scale in old_values:
407  p.set_scale(scale)
408 
409  # Have our dataset point to that in the original PMI restraint
410  def __set_dataset(self, val):
411  self.pmi_restraint.dataset = val
412  dataset = property(lambda self: self.pmi_restraint.dataset,
413  __set_dataset)
414 
415 
416 def get_asym_mapper_for_state(simo, state, asym_map):
417  asym = asym_map.get(state, None)
418  if asym is None:
419  asym = _AsymMapper(simo, state.prot)
420  asym_map[state] = asym
421  return asym
422 
423 
424 class _PMICrossLink:
425  # Query PMI particles to get psi and sigma values at time of use, not
426  # time of construction (since they may be set after creating the restraint)
427  psi = property(lambda self: self.psi_p.get_scale(),
428  lambda self, val: None)
429  sigma1 = property(lambda self: self.sigma1_p.get_scale(),
430  lambda self, val: None)
431  sigma2 = property(lambda self: self.sigma2_p.get_scale(),
432  lambda self, val: None)
433 
434 
435 class _ResidueCrossLink(ihm.restraint.ResidueCrossLink, _PMICrossLink):
436  pass
437 
438 
439 class _FeatureCrossLink(ihm.restraint.FeatureCrossLink, _PMICrossLink):
440  pass
441 
442 
443 class _EM2DRestraint(ihm.restraint.EM2DRestraint):
444  def __init__(self, state, pmi_restraint, image_number, resolution,
445  pixel_size, image_resolution, projection_number,
446  micrographs_number):
447  self.pmi_restraint, self.image_number = pmi_restraint, image_number
448  super().__init__(
449  dataset=pmi_restraint.datasets[image_number],
450  assembly=state.modeled_assembly,
451  segment=False, number_raw_micrographs=micrographs_number,
452  pixel_size_width=pixel_size, pixel_size_height=pixel_size,
453  image_resolution=image_resolution,
454  number_of_projections=projection_number)
455 
456  # Have our dataset point to that in the original PMI restraint
457  def __get_dataset(self):
458  return self.pmi_restraint.datasets[self.image_number]
459 
460  def __set_dataset(self, val):
461  self.pmi_restraint.datasets[self.image_number] = val
462 
463  dataset = property(__get_dataset, __set_dataset)
464 
465  def add_fits_from_model_statfile(self, model):
466  ccc = self._get_cross_correlation(model)
467  transform = self._get_transformation(model)
468  rot = transform.get_rotation()
469  rm = [[e for e in rot.get_rotation_matrix_row(i)] for i in range(3)]
470  self.fits[model] = ihm.restraint.EM2DRestraintFit(
471  cross_correlation_coefficient=ccc,
472  rot_matrix=rm,
473  tr_vector=transform.get_translation())
474 
475  def _get_transformation(self, model):
476  """Get the transformation that places the model on the image"""
477  stats = model.em2d_stats or model.stats
478  prefix = 'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
479  self.image_number + 1)
480  r = [float(stats[prefix + '_Rotation%d' % i]) for i in range(4)]
481  t = [float(stats[prefix + '_Translation%d' % i])
482  for i in range(3)]
483  # If the model coordinates are transformed, need to back transform
484  # them first
485  inv = model.transform.get_inverse()
487  IMP.algebra.Vector3D(*t)) * inv
488 
489  def _get_cross_correlation(self, model):
490  """Get the cross correlation coefficient between the model projection
491  and the image"""
492  stats = model.em2d_stats or model.stats
493  return float(stats['ElectronMicroscopy2D_%s_Image%d_CCC'
494  % (self.pmi_restraint.label,
495  self.image_number + 1)])
496 
497 
498 class _EM3DRestraint(ihm.restraint.EM3DRestraint):
499 
500  def __init__(self, simo, state, pmi_restraint, target_ps, densities):
501  self.pmi_restraint = pmi_restraint
502  super().__init__(
503  dataset=pmi_restraint.dataset,
504  assembly=self._get_assembly(densities, simo, state),
505  fitting_method='Gaussian mixture models',
506  number_of_gaussians=len(target_ps))
507 
508  # Have our dataset point to that in the original PMI restraint
509  def __set_dataset(self, val):
510  self.pmi_restraint.dataset = val
511  dataset = property(lambda self: self.pmi_restraint.dataset,
512  __set_dataset)
513 
514  def _get_assembly(self, densities, simo, state):
515  """Get the Assembly that this restraint acts on"""
516  cm = _ComponentMapper(state.prot)
517  components = {}
518  for d in densities:
519  components[cm[d]] = None # None == all residues in this component
520  a = simo._get_subassembly(
521  components, name="EM subassembly",
522  description="All components that fit the EM map")
523  return a
524 
525  def add_fits_from_model_statfile(self, model):
526  ccc = self._get_cross_correlation(model)
527  self.fits[model] = ihm.restraint.EM3DRestraintFit(
528  cross_correlation_coefficient=ccc)
529 
530  def _get_cross_correlation(self, model):
531  """Get the cross correlation coefficient between the model
532  and the map"""
533  if model.stats is not None:
534  return float(model.stats['GaussianEMRestraint_%s_CCC'
535  % self.pmi_restraint.label])
536 
537 
538 class _GeometricRestraint(ihm.restraint.GeometricRestraint):
539 
540  def __init__(self, simo, state, pmi_restraint, geometric_object,
541  feature, distance, sigma):
542  self.pmi_restraint = pmi_restraint
543  super().__init__(
544  dataset=pmi_restraint.dataset,
545  geometric_object=geometric_object, feature=feature,
546  distance=distance, harmonic_force_constant=1. / sigma,
547  restrain_all=True)
548 
549  # Have our dataset point to that in the original PMI restraint
550  def __set_dataset(self, val):
551  self.pmi_restraint.dataset = val
552  dataset = property(lambda self: self.pmi_restraint.dataset,
553  __set_dataset)
554 
555 
556 class _ReplicaExchangeProtocolStep(ihm.protocol.Step):
557  def __init__(self, state, rex):
558  if rex.monte_carlo_sample_objects is not None:
559  method = 'Replica exchange monte carlo'
560  else:
561  method = 'Replica exchange molecular dynamics'
562  self.monte_carlo_temperature = rex.vars['monte_carlo_temperature']
563  self.replica_exchange_minimum_temperature = \
564  rex.vars['replica_exchange_minimum_temperature']
565  self.replica_exchange_maximum_temperature = \
566  rex.vars['replica_exchange_maximum_temperature']
567  super().__init__(
568  assembly=state.modeled_assembly,
569  dataset_group=None, # filled in by add_step()
570  method=method, name='Sampling',
571  num_models_begin=None, # filled in by add_step()
572  num_models_end=rex.vars["number_of_frames"],
573  multi_scale=True, multi_state=False, ordered=False, ensemble=True)
574 
575 
576 class _ReplicaExchangeProtocolDumper(ihm.dumper.Dumper):
577  """Write IMP-specific information about replica exchange to mmCIF.
578  Note that IDs will have already been assigned by python-ihm's
579  standard modeling protocol dumper."""
580  def dump(self, system, writer):
581  with writer.loop("_imp_replica_exchange_protocol",
582  ["protocol_id", "step_id", "monte_carlo_temperature",
583  "replica_exchange_minimum_temperature",
584  "replica_exchange_maximum_temperature"]) as lp:
585  for p in system._all_protocols():
586  for s in p.steps:
587  if isinstance(s, _ReplicaExchangeProtocolStep):
588  self._dump_step(p, s, lp)
589 
590  def _dump_step(self, p, s, lp):
591  mintemp = s.replica_exchange_minimum_temperature
592  maxtemp = s.replica_exchange_maximum_temperature
593  lp.write(protocol_id=p._id, step_id=s._id,
594  monte_carlo_temperature=s.monte_carlo_temperature,
595  replica_exchange_minimum_temperature=mintemp,
596  replica_exchange_maximum_temperature=maxtemp)
597 
598 
599 class _ReplicaExchangeProtocolHandler(ihm.reader.Handler):
600  category = '_imp_replica_exchange_protocol'
601 
602  """Read IMP-specific information about replica exchange from mmCIF."""
603  def __call__(self, protocol_id, step_id, monte_carlo_temperature,
604  replica_exchange_minimum_temperature,
605  replica_exchange_maximum_temperature):
606  p = self.sysr.protocols.get_by_id(protocol_id)
607  # todo: match step_id properly (don't assume contiguous)
608  s = p.steps[int(step_id)-1]
609  # Upgrade from plain ihm Step to IMP subclass
610  s.__class__ = _ReplicaExchangeProtocolStep
611  s.monte_carlo_temperature = \
612  self.get_float(monte_carlo_temperature)
613  s.replica_exchange_minimum_temperature = \
614  self.get_float(replica_exchange_minimum_temperature)
615  s.replica_exchange_maximum_temperature = \
616  self.get_float(replica_exchange_maximum_temperature)
617 
618 
619 class _SimpleProtocolStep(ihm.protocol.Step):
620  def __init__(self, state, num_models_end, method):
621  super().__init__(
622  assembly=state.modeled_assembly,
623  dataset_group=None, # filled in by add_step()
624  method=method, name='Sampling',
625  num_models_begin=None, # filled in by add_step()
626  num_models_end=num_models_end,
627  multi_scale=True, multi_state=False, ordered=False,
628  ensemble=True)
629 
630 
631 class _Chain:
632  """Represent a single chain in a Model"""
633  def __init__(self, pmi_chain_id, asym_unit):
634  self.pmi_chain_id, self.asym_unit = pmi_chain_id, asym_unit
635  self.spheres = []
636  self.atoms = []
637 
638  def add(self, xyz, atom_type, residue_type, residue_index,
639  all_indexes, radius):
640  if atom_type is None:
641  self.spheres.append((xyz, residue_type, residue_index,
642  all_indexes, radius))
643  else:
644  self.atoms.append((xyz, atom_type, residue_type, residue_index,
645  all_indexes, radius))
646  orig_comp = property(lambda self: self.comp)
647 
648 
649 class _TransformedChain:
650  """Represent a chain that is a transformed version of another"""
651  def __init__(self, orig_chain, asym_unit, transform):
652  self.orig_chain, self.asym_unit = orig_chain, asym_unit
653  self.transform = transform
654 
655  def __get_spheres(self):
656  for (xyz, residue_type, residue_index, all_indexes,
657  radius) in self.orig_chain.spheres:
658  yield (self.transform * xyz, residue_type, residue_index,
659  all_indexes, radius)
660  spheres = property(__get_spheres)
661 
662  def __get_atoms(self):
663  for (xyz, atom_type, residue_type, residue_index, all_indexes,
664  radius) in self.orig_chain.atoms:
665  yield (self.transform * xyz, atom_type, residue_type,
666  residue_index, all_indexes, radius)
667  atoms = property(__get_atoms)
668 
669  entity = property(lambda self: self.orig_chain.entity)
670  orig_comp = property(lambda self: self.orig_chain.comp)
671 
672 
673 class _Excluder:
674  def __init__(self, component, simo):
675  self._seqranges = simo._exclude_coords.get(component, [])
676 
677  def is_excluded(self, indexes):
678  """Return True iff the given sequence range is excluded."""
679  for seqrange in self._seqranges:
680  if indexes[0] >= seqrange[0] and indexes[-1] <= seqrange[1]:
681  return True
682 
683 
684 class _Model(ihm.model.Model):
685  def __init__(self, prot, simo, protocol, assembly, representation):
686  super().__init__(assembly=assembly, protocol=protocol,
687  representation=representation)
688  self.simo = weakref.proxy(simo)
689  # Transformation from IMP coordinates into mmCIF coordinate system.
690  # Normally we pass through coordinates unchanged, but in some cases
691  # we may want to translate them (e.g. Nup84, where the deposited PDB
692  # files are all centered; we want the mmCIF files to match)
694  self.em2d_stats = None
695  self.stats = None
696  # True iff restraints act on this model
697  self._is_restrained = True
698  o = self.output = IMP.pmi.output.Output(atomistic=True)
699  name = 'cif-output'
700  self.m = prot.get_model()
701  o.dictionary_pdbs[name] = prot
702  o._init_dictchain(name, prot, multichar_chain=True)
703  (particle_infos_for_pdb,
704  self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
705  self.geometric_center = IMP.algebra.Vector3D(*self.geometric_center)
706  self._make_spheres_atoms(particle_infos_for_pdb, o, name, simo)
707  self.rmsf = {}
708 
709  def all_chains(self, simo):
710  """Yield all chains, including transformed ones"""
711  chain_for_comp = {}
712  for c in self.chains:
713  yield c
714  chain_for_comp[c.comp] = c
715  for tc in simo._transformed_components:
716  orig_chain = chain_for_comp.get(tc.original, None)
717  if orig_chain:
718  asym = simo.asym_units[tc.name]
719  c = _TransformedChain(orig_chain, asym, tc.transform)
720  c.comp = tc.name
721  yield c
722 
723  def _make_spheres_atoms(self, particle_infos_for_pdb, o, name, simo):
724  entity_for_chain = {}
725  comp_for_chain = {}
726  correct_asym = {}
727  for protname, chain_id in o.dictchain[name].items():
728  if protname in simo.entities:
729  entity_for_chain[chain_id] = simo.entities[protname]
730  else:
731  # Remove copy number
732  pn = protname.split('.')[0]
733  entity_for_chain[chain_id] = simo.entities[pn]
734  comp_for_chain[chain_id] = protname
735  # When doing multi-state modeling, the chain ID returned here
736  # (assigned sequentially) might not be correct (states may have
737  # gaps in the chain IDs). Map it to the correct asym unit.
738  correct_asym[chain_id] = simo.asym_units[protname]
739 
740  # Gather by chain ID (should be sorted by chain ID already)
741  self.chains = []
742  chain = None
743  excluder = None
744 
745  for (xyz, atom_type, residue_type, chain_id, residue_index,
746  all_indexes, radius) in particle_infos_for_pdb:
747  if chain is None or chain.pmi_chain_id != chain_id:
748  chain = _Chain(chain_id, correct_asym[chain_id])
749  chain.entity = entity_for_chain[chain_id]
750  chain.comp = comp_for_chain[chain_id]
751  self.chains.append(chain)
752  excluder = _Excluder(chain.comp, simo)
753  if not excluder.is_excluded(all_indexes if all_indexes
754  else [residue_index]):
755  chain.add(xyz, atom_type, residue_type, residue_index,
756  all_indexes, radius)
757 
758  def parse_rmsf_file(self, fname, component):
759  self.rmsf[component] = rmsf = {}
760  with open(str(fname)) as fh:
761  for line in fh:
762  resnum, blocknum, val = line.split()
763  rmsf[int(resnum)] = (int(blocknum), float(val))
764 
765  def get_rmsf(self, component, indexes):
766  """Get the RMSF value for the given residue indexes."""
767  if not self.rmsf:
768  return None
769  rmsf = self.rmsf[component]
770  blocknums = dict.fromkeys(rmsf[ind][0] for ind in indexes)
771  if len(blocknums) != 1:
772  raise ValueError("Residue indexes %s aren't all in the same block"
773  % str(indexes))
774  return rmsf[indexes[0]][1]
775 
776  def get_atoms(self):
777  for chain in self.all_chains(self.simo):
778  pmi_offset = chain.asym_unit.entity.pmi_offset
779  for atom in chain.atoms:
780  (xyz, atom_type, residue_type, residue_index,
781  all_indexes, radius) = atom
782  pt = self.transform * xyz
783  yield ihm.model.Atom(
784  asym_unit=chain.asym_unit,
785  seq_id=residue_index - pmi_offset,
786  atom_id=atom_type.get_string(),
787  type_symbol=None, # todo: get element
788  x=pt[0], y=pt[1], z=pt[2])
789 
790  def get_spheres(self):
791  for chain in self.all_chains(self.simo):
792  pmi_offset = chain.asym_unit.entity.pmi_offset
793  for sphere in chain.spheres:
794  (xyz, residue_type, residue_index,
795  all_indexes, radius) = sphere
796  if all_indexes is None:
797  all_indexes = (residue_index,)
798  pt = self.transform * xyz
799  yield ihm.model.Sphere(
800  asym_unit=chain.asym_unit,
801  seq_id_range=(all_indexes[0] - pmi_offset,
802  all_indexes[-1] - pmi_offset),
803  x=pt[0], y=pt[1], z=pt[2], radius=radius,
804  rmsf=self.get_rmsf(chain.orig_comp, all_indexes))
805 
806 
807 class _AllProtocols:
808  def __init__(self, simo):
809  self.simo = weakref.proxy(simo)
810  # Lists of Protocols by state
811  self.protocols = OrderedDict()
812 
813  def add_protocol(self, state):
814  """Add a new Protocol"""
815  if state not in self.protocols:
816  self.protocols[state] = []
817  p = ihm.protocol.Protocol()
818  self.simo.system.orphan_protocols.append(p)
819  self.protocols[state].append(p)
820 
821  def add_step(self, step, state):
822  """Add a ProtocolStep to the last Protocol of the given State"""
823  if state not in self.protocols:
824  self.add_protocol(state)
825  protocol = self.get_last_protocol(state)
826  if len(protocol.steps) == 0:
827  step.num_models_begin = 0
828  else:
829  step.num_models_begin = protocol.steps[-1].num_models_end
830  protocol.steps.append(step)
831  step.id = len(protocol.steps)
832  # Assume that protocol uses all currently-defined datasets
833  step.dataset_group = self.simo.all_datasets.get_all_group(state)
834 
835  def add_postproc(self, step, state):
836  """Add a postprocessing step to the last protocol"""
837  protocol = self.get_last_protocol(state)
838  if len(protocol.analyses) == 0:
839  protocol.analyses.append(ihm.analysis.Analysis())
840  protocol.analyses[-1].steps.append(step)
841 
842  def get_last_protocol(self, state):
843  """Return the most recently-added _Protocol"""
844  return self.protocols[state][-1]
845 
846 
847 class _AllStartingModels:
848  def __init__(self, simo):
849  self.simo = simo
850  # dict of starting models (entire PDB files), collected from fragments,
851  # ordered by component name and state
852  self.models = OrderedDict()
853  self.output = IMP.pmi.output.Output()
854 
855  def add_pdb_fragment(self, fragment):
856  """Add a starting model PDB fragment."""
857  comp = fragment.component
858  state = fragment.state
859  if comp not in self.models:
860  self.models[comp] = OrderedDict()
861  if state not in self.models[comp]:
862  self.models[comp][state] = []
863  models = self.models[comp][state]
864  if len(models) == 0 \
865  or models[-1].fragments[0].pdbname != fragment.pdbname:
866  model = self._add_model(fragment)
867  models.append(model)
868  else:
869  # Break circular ref between fragment and model
870  models[-1].fragments.append(weakref.proxy(fragment))
871  # Update residue range to cover all fragments
872  # We don't use fragment.offset here because fragment numbering
873  # is already in seq_id, not PDB numbering
874  pmi_offset = models[-1].asym_unit.entity.pmi_offset
875  sid_begin = min(fragment.start - pmi_offset,
876  models[-1].asym_unit.seq_id_range[0])
877  sid_end = max(fragment.end - pmi_offset,
878  models[-1].asym_unit.seq_id_range[1])
879  models[-1].asym_unit = fragment.asym_unit.asym(sid_begin, sid_end)
880  fragment.starting_model = models[-1]
881 
882  def _add_model(self, f):
883  parser = ihm.metadata.PDBParser()
884  r = parser.parse_file(f.pdbname)
885 
886  self.simo._add_dataset(r['dataset'])
887  # We only want the templates that model the starting model chain
888  templates = r['templates'].get(f.chain, [])
889  for t in templates:
890  if t.alignment_file:
891  self.simo.system.locations.append(t.alignment_file)
892  if t.dataset:
893  self.simo._add_dataset(t.dataset)
894  source = r['entity_source'].get(f.chain)
895  if source:
896  f.asym_unit.entity.source = source
897  pmi_offset = f.asym_unit.entity.pmi_offset
898  m = _StartingModel(
899  asym_unit=f.asym_unit.asym.pmi_range(f.start, f.end),
900  dataset=r['dataset'], asym_id=f.chain,
901  templates=templates, offset=f.offset + pmi_offset,
902  metadata=r['metadata'],
903  software=r['software'][0] if r['software'] else None,
904  script_file=r['script'])
905  m.fragments = [weakref.proxy(f)]
906  return m
907 
908 
909 class _StartingModel(ihm.startmodel.StartingModel):
910  def get_seq_dif(self):
911  return self._seq_dif
912 
913  def get_atoms(self):
914  pmi_offset = self.asym_unit.entity.pmi_offset
915  mh = IMP.mmcif.data._StartingModelAtomHandler(self.templates,
916  self.asym_unit)
917  for f in self.fragments:
918  sel = IMP.atom.Selection(
919  f.starting_hier,
920  residue_indexes=list(range(f.start - f.offset,
921  f.end - f.offset + 1)))
922  for a in mh.get_ihm_atoms(sel.get_selected_particles(),
923  f.offset - pmi_offset):
924  yield a
925  self._seq_dif = mh._seq_dif
926 
927 
928 class _ReplicaExchangeAnalysisPostProcess(ihm.analysis.ClusterStep):
929  """Post processing using AnalysisReplicaExchange0 macro"""
930 
931  def __init__(self, rex, num_models_begin):
932  self.rex = rex
933  num_models_end = 0
934  for fname in self.get_all_stat_files():
935  with open(str(fname)) as fh:
936  num_models_end += len(fh.readlines())
937  super().__init__(
938  feature='RMSD', num_models_begin=num_models_begin,
939  num_models_end=num_models_end)
940 
941  def get_stat_file(self, cluster_num):
942  return self.rex._outputdir / ("cluster.%d" % cluster_num) / 'stat.out'
943 
944  def get_all_stat_files(self):
945  for i in range(self.rex._number_of_clusters):
946  yield self.get_stat_file(i)
947 
948 
949 class _ReplicaExchangeAnalysisEnsemble(ihm.model.Ensemble):
950  """Ensemble generated using AnalysisReplicaExchange0 macro"""
951 
952  num_models_deposited = None # Override base class property
953 
954  def __init__(self, pp, cluster_num, model_group, num_deposit):
955  with open(str(pp.get_stat_file(cluster_num))) as fh:
956  num_models = len(fh.readlines())
957  super().__init__(
958  num_models=num_models,
959  model_group=model_group, post_process=pp,
960  clustering_feature=pp.feature,
961  name=model_group.name)
962  self.cluster_num = cluster_num
963  self.num_models_deposited = num_deposit
964 
965  def get_rmsf_file(self, component):
966  return (self.post_process.rex._outputdir
967  / ('cluster.%d' % self.cluster_num)
968  / ('rmsf.%s.dat' % component))
969 
970  def load_rmsf(self, model, component):
971  fname = self.get_rmsf_file(component)
972  if fname.exists():
973  model.parse_rmsf_file(fname, component)
974 
975  def get_localization_density_file(self, fname):
976  return (self.post_process.rex._outputdir
977  / ('cluster.%d' % self.cluster_num)
978  / ('%s.mrc' % fname))
979 
980  def load_localization_density(self, state, fname, select_tuple,
981  asym_units):
982  fullpath = self.get_localization_density_file(fname)
983  if fullpath.exists():
984  details = "Localization density for %s %s" \
985  % (fname, self.model_group.name)
986  local_file = ihm.location.OutputFileLocation(str(fullpath),
987  details=details)
988  for s in select_tuple:
989  if isinstance(s, tuple) and len(s) == 3:
990  asym = asym_units[s[2]].pmi_range(s[0], s[1])
991  else:
992  asym = asym_units[s]
993  den = ihm.model.LocalizationDensity(file=local_file,
994  asym_unit=asym)
995  self.densities.append(den)
996 
997  def load_all_models(self, simo, state):
998  stat_fname = self.post_process.get_stat_file(self.cluster_num)
999  model_num = 0
1000  with open(str(stat_fname)) as fh:
1001  stats = ast.literal_eval(fh.readline())
1002  # Correct path
1003  rmf_file = stat_fname.parent / ("%d.rmf3" % model_num)
1004  # todo: test with real PMI2 systems
1005  if rmf_file.exists():
1006  rh = RMF.open_rmf_file_read_only(str(rmf_file))
1007  system = state._pmi_object.system
1008  IMP.rmf.link_hierarchies(rh, [system.hier])
1009  IMP.rmf.load_frame(fh, RMF.FrameID(0))
1010  # todo: fill in other data from stat file, e.g. cross-link phi/psi
1011  yield stats
1012  model_num += 1
1013  if model_num >= self.num_models_deposited:
1014  return
1015 
1016  # todo: also support dRMS precision
1017  def _get_precision(self):
1018  precfile = (self.post_process.rex._outputdir /
1019  ("precision.%d.%d.out" % (self.cluster_num,
1020  self.cluster_num)))
1021  if not precfile.exists():
1022  return ihm.unknown
1023  # Fail if the precision.x.x.out file doesn't match the cluster
1024  r = re.compile(
1025  r'All .*/cluster.%d/ average centroid distance ([\d\.]+)'
1026  % self.cluster_num)
1027  with open(str(precfile)) as fh:
1028  for line in fh:
1029  m = r.match(line)
1030  if m:
1031  return float(m.group(1))
1032 
1033  precision = property(lambda self: self._get_precision(),
1034  lambda self, val: None)
1035 
1036 
1037 class _SimpleEnsemble(ihm.model.Ensemble):
1038  """Simple manually-created ensemble"""
1039 
1040  num_models_deposited = None # Override base class property
1041 
1042  def __init__(self, pp, model_group, num_models, drmsd,
1043  num_models_deposited, ensemble_file):
1044  super().__init__(
1045  model_group=model_group, post_process=pp, num_models=num_models,
1046  file=ensemble_file, precision=drmsd, name=model_group.name,
1047  clustering_feature='dRMSD')
1048  self.num_models_deposited = num_models_deposited
1049 
1050  def load_localization_density(self, state, component, asym, local_file):
1051  den = ihm.model.LocalizationDensity(file=local_file, asym_unit=asym)
1052  self.densities.append(den)
1053 
1054 
1055 class _CustomDNAAlphabet:
1056  """Custom DNA alphabet that maps A,C,G,T (rather than DA,DC,DG,DT
1057  as in python-ihm)"""
1058  _comps = dict([cc.code_canonical, cc]
1059  for cc in ihm.DNAAlphabet._comps.values())
1060 
1061 
1062 class _EntityMapper(dict):
1063  """Handle mapping from IMP components (without copy number) to CIF
1064  entities. Multiple components may map to the same entity if they
1065  share sequence."""
1066  def __init__(self, system):
1067  super().__init__()
1068  self._sequence_dict = {}
1069  self._entities = []
1070  self.system = system
1071 
1072  def _get_alphabet(self, alphabet):
1073  """Map a PMI alphabet to an IHM alphabet"""
1074  # todo: we should probably ignore alphabets entirely and use
1075  # residue types directly (e.g. we shouldn't compare one-letter
1076  # code sequence to determine if an Entity is unique)
1077  alphabet_map = {None: ihm.LPeptideAlphabet,
1078  IMP.pmi.alphabets.amino_acid: ihm.LPeptideAlphabet,
1079  IMP.pmi.alphabets.rna: ihm.RNAAlphabet,
1080  IMP.pmi.alphabets.dna: _CustomDNAAlphabet}
1081  if alphabet in alphabet_map:
1082  return alphabet_map[alphabet]
1083  else:
1084  raise TypeError("Don't know how to handle %s" % alphabet)
1085 
1086  def add(self, component_name, sequence, offset, alphabet, uniprot):
1087  def entity_seq(sequence):
1088  # Map X to UNK
1089  if 'X' in sequence:
1090  return ['UNK' if s == 'X' else s for s in sequence]
1091  else:
1092  return sequence
1093  if sequence not in self._sequence_dict:
1094  # Use the name of the first component, stripped of any copy number,
1095  # as the description of the entity
1096  d = component_name.split("@")[0].split(".")[0]
1097  entity = Entity(entity_seq(sequence), description=d,
1098  pmi_offset=offset,
1099  alphabet=self._get_alphabet(alphabet),
1100  uniprot=uniprot)
1101  self.system.entities.append(entity)
1102  self._sequence_dict[sequence] = entity
1103  self[component_name] = self._sequence_dict[sequence]
1104 
1105 
1106 class _TransformedComponent:
1107  def __init__(self, name, original, transform):
1108  self.name, self.original, self.transform = name, original, transform
1109 
1110 
1111 class _SimpleRef:
1112  """Class with similar interface to weakref.ref, but keeps a strong ref"""
1113  def __init__(self, ref):
1114  self.ref = ref
1115 
1116  def __call__(self):
1117  return self.ref
1118 
1119 
1120 class _State(ihm.model.State):
1121  """Representation of a single state in the system."""
1122 
1123  def __init__(self, pmi_object, po):
1124  # Point to the PMI object for this state. Use a weak reference
1125  # since the state object typically points to us too, so we need
1126  # to break the reference cycle. In PMI1 this will be a
1127  # Representation object; in PMI2 it is the PMI2 State object itself.
1128  self._pmi_object = weakref.proxy(pmi_object)
1129  if hasattr(pmi_object, 'state'):
1130  # Need a strong ref to pmi_object.state to prevent it from being
1131  # cleaned up when doing PMI1 multistate modeling
1132  self._pmi_state = _SimpleRef(pmi_object.state)
1133  else:
1134  self._pmi_state = weakref.ref(pmi_object)
1135  # Preserve PMI state name
1136  old_name = self.name
1137  super().__init__(experiment_type='Fraction of bulk')
1138  self.name = old_name
1139 
1140  # The assembly of all components modeled by IMP in this state.
1141  # This may be smaller than the complete assembly.
1142  self.modeled_assembly = ihm.Assembly(
1143  name="Modeled assembly",
1144  description=self.get_postfixed_name(
1145  "All components modeled by IMP"))
1146  po.system.orphan_assemblies.append(self.modeled_assembly)
1147 
1148  self.all_modeled_components = []
1149 
1150  def __hash__(self):
1151  return hash(self._pmi_state())
1152 
1153  def __eq__(self, other):
1154  return self._pmi_state() == other._pmi_state()
1155 
1156  def add_model_group(self, group):
1157  self.append(group)
1158  return group
1159 
1160  def get_prefixed_name(self, name):
1161  """Prefix the given name with the state name, if available."""
1162  if self.short_name:
1163  return self.short_name + ' ' + name
1164  else:
1165  # Can't use capitalize() since that un-capitalizes everything
1166  # but the first letter
1167  return name[0].upper() + name[1:] if name else ''
1168 
1169  def get_postfixed_name(self, name):
1170  """Postfix the given name with the state name, if available."""
1171  if self.short_name:
1172  return "%s in state %s" % (name, self.short_name)
1173  else:
1174  return name
1175 
1176  short_name = property(lambda self: self._pmi_state().short_name)
1177  long_name = property(lambda self: self._pmi_state().long_name)
1178 
1179  def __get_name(self):
1180  return self._pmi_state().long_name
1181 
1182  def __set_name(self, val):
1183  self._pmi_state().long_name = val
1184 
1185  name = property(__get_name, __set_name)
1186 
1187 
1188 class Entity(ihm.Entity):
1189  """A single entity in the system. This contains information (such as
1190  database identifiers) specific to a particular sequence rather than
1191  a copy (for example, when modeling a homodimer, two AsymUnits will
1192  point to the same Entity).
1193 
1194  This functions identically to the base ihm.Entity class, but it
1195  allows identifying residues by either the PMI numbering scheme
1196  (which is always contiguous starting at 1, covering the entire
1197  sequence in the FASTA files, or the IHM scheme (seq_id, which also
1198  starts at 1, but which only covers the modeled subset of the full
1199  sequence, with non-modeled N-terminal or C-terminal residues
1200  removed). The actual offset (which is the integer to be added to the
1201  IHM numbering to get PMI numbering, or equivalently the number of
1202  not-represented N-terminal residues in the PMI sequence) is
1203  available in the `pmi_offset` member.
1204 
1205  If a UniProt accession was provided for the sequence (either when
1206  State.create_molecule() was called, or in the FASTA alignment file
1207  header) then that is available in the `uniprot` member, and can be
1208  added to the IHM system with the add_uniprot_reference method.
1209  """
1210  def __init__(self, sequence, pmi_offset, uniprot, *args, **keys):
1211  # Offset between PMI numbering and IHM; <pmi_#> = <ihm_#> + pmi_offset
1212  # (pmi_offset is also the number of N-terminal gaps in the FASTA file)
1213  self.pmi_offset = pmi_offset
1214  self.uniprot = uniprot
1215  super().__init__(sequence, *args, **keys)
1216 
1217  def pmi_residue(self, res_id):
1218  """Return a single IHM residue indexed using PMI numbering"""
1219  return self.residue(res_id - self.pmi_offset)
1220 
1221  def pmi_range(self, res_id_begin, res_id_end):
1222  """Return a range of IHM residues indexed using PMI numbering"""
1223  off = self.pmi_offset
1224  return self(res_id_begin - off, res_id_end - off)
1225 
1226  def add_uniprot_reference(self):
1227  """Add UniProt accession (if available) to the IHM system.
1228  If a UniProt accession was provided for the sequence (either when
1229  State.create_molecule() was called, or in the FASTA alignment file
1230  header), then look this up at the UniProt web site (requires
1231  network access) to get full information, and add it to the IHM
1232  system. The resulting reference object is returned. If the IMP
1233  and UniProt sequences are not identical, then this object may
1234  need to be modified by specifying an alignment and/or
1235  single-point mutations.
1236  """
1237  if self.uniprot:
1238  print('Adding UniProt accession %s reference for entity %s'
1239  % (self.uniprot, self.description))
1240  ref = ihm.reference.UniProtSequence.from_accession(self.uniprot)
1241  self.references.append(ref)
1242  return ref
1243 
1244 
1245 class AsymUnit(ihm.AsymUnit):
1246  """A single asymmetric unit in the system. This roughly corresponds to
1247  a single PMI subunit (sequence, copy, or clone).
1248 
1249  This functions identically to the base ihm.AsymUnit class, but it
1250  allows identifying residues by either the PMI numbering scheme
1251  (which is always contiguous starting at 1, covering the entire
1252  sequence in the FASTA files, or the IHM scheme (seq_id, which also
1253  starts at 1, but which only covers the modeled subset of the full
1254  sequence, with non-modeled N-terminal or C-terminal residues
1255  removed).
1256 
1257  The `entity` member of this class points to an Entity object, which
1258  contains information (such as database identifiers) specific to
1259  a particular sequence rather than a copy (for example, when modeling
1260  a homodimer, two AsymUnits will point to the same Entity).
1261  """
1262 
1263  def __init__(self, entity, *args, **keys):
1264  super().__init__(
1265  entity, auth_seq_id_map=entity.pmi_offset, *args, **keys)
1266 
1267  def pmi_residue(self, res_id):
1268  """Return a single IHM residue indexed using PMI numbering"""
1269  return self.residue(res_id - self.entity.pmi_offset)
1270 
1271  def pmi_range(self, res_id_begin, res_id_end):
1272  """Return a range of IHM residues indexed using PMI numbering"""
1273  off = self.entity.pmi_offset
1274  return self(res_id_begin - off, res_id_end - off)
1275 
1276 
1278  """Class to encode a modeling protocol as mmCIF.
1279 
1280  IMP has basic support for writing out files in mmCIF format, for
1281  deposition in [PDB-IHM](https://pdb-ihm.org/).
1282  After creating an instance of this class, attach it to an
1283  IMP.pmi.topology.System object. After this, any
1284  generated models and metadata are automatically collected in the
1285  `system` attribute, which is an
1286  [ihm.System](https://python-ihm.readthedocs.io/en/latest/main.html#ihm.System) object.
1287  Once the protocol is complete, call finalize() to make sure `system`
1288  contains everything, then use the
1289  [python-ihm API](https://python-ihm.readthedocs.io/en/latest/dumper.html#ihm.dumper.write)
1290  to write out files in mmCIF or BinaryCIF format.
1291 
1292  Each PMI subunit will be mapped to an IHM AsymUnit class which contains the
1293  subset of the sequence that was represented. Use the `asym_units` dict to
1294  get this object given a PMI subunit name. Each unique sequence will be
1295  mapped to an IHM Entity class (for example when modeling a homodimer
1296  there will be two AsymUnits which both point to the same Entity). Use
1297  the `entities` dict to get this object from a PMI subunit name.
1298 
1299  See also Entity, AsymUnit, get_handlers(), get_dumpers().
1300  """ # noqa: E501
1301  def __init__(self):
1302  # Ultimately, collect data in an ihm.System object
1303  self.system = ihm.System()
1304  self._state_group = ihm.model.StateGroup()
1305  self.system.state_groups.append(self._state_group)
1306 
1307  self._state_ensemble_offset = 0
1308  self._main_script = os.path.abspath(sys.argv[0])
1309 
1310  # Point to the main modeling script
1311  loc = ihm.location.WorkflowFileLocation(
1312  path=self._main_script,
1313  details="The main integrative modeling script")
1314  self.system.locations.append(loc)
1315 
1316  self._states = {}
1317  self.__asym_states = {}
1318  self._working_directory = os.getcwd()
1319  self.default_representation = self.create_representation(
1320  "Default representation")
1321  self.entities = _EntityMapper(self.system)
1322  # Mapping from component names to ihm.AsymUnit
1323  self.asym_units = {}
1324  self._all_components = {}
1325  self.all_modeled_components = []
1326  self._transformed_components = []
1327  self.sequence_dict = {}
1328 
1329  # Common geometric objects used in PMI systems
1330  self._xy_plane = ihm.geometry.XYPlane()
1331  self._xz_plane = ihm.geometry.XZPlane()
1332  self._z_axis = ihm.geometry.ZAxis()
1333  self._center_origin = ihm.geometry.Center(0, 0, 0)
1334  self._identity_transform = ihm.geometry.Transformation.identity()
1335 
1336  # Coordinates to exclude
1337  self._exclude_coords = {}
1338 
1339  self.all_representations = _AllModelRepresentations(self)
1340  self.all_protocols = _AllProtocols(self)
1341  self.all_datasets = _AllDatasets(self.system)
1342  self.all_starting_models = _AllStartingModels(self)
1343 
1344  self.all_software = _AllSoftware(self.system)
1345 
1346  def create_representation(self, name):
1347  """Create a new Representation and return it. This can be
1348  passed to add_model(), add_bead_element() or add_pdb_element()."""
1349  r = ihm.representation.Representation(name=name)
1350  self.system.orphan_representations.append(r)
1351  return r
1352 
1353  def exclude_coordinates(self, component, seqrange):
1354  """Don't record coordinates for the given domain.
1355  Coordinates for the given domain (specified by a component name
1356  and a 2-element tuple giving the start and end residue numbers)
1357  will be excluded from the mmCIF file. This can be used to exclude
1358  parts of the structure that weren't well resolved in modeling.
1359  Any bead or residue that lies wholly within this range will be
1360  excluded. Multiple ranges for a given component can be excluded
1361  by calling this method multiple times."""
1362  if component not in self._exclude_coords:
1363  self._exclude_coords[component] = []
1364  self._exclude_coords[component].append(seqrange)
1365 
1366  def _is_excluded(self, component, start, end):
1367  """Return True iff this chunk of sequence should be excluded"""
1368  for seqrange in self._exclude_coords.get(component, ()):
1369  if start >= seqrange[0] and end <= seqrange[1]:
1370  return True
1371 
1372  def _add_state(self, state):
1373  """Create a new state and return a pointer to it."""
1374  self._state_ensemble_offset = len(self.system.ensembles)
1375  s = _State(state, self)
1376  self._state_group.append(s)
1377  self._last_state = s
1378  return s
1379 
1380  def _get_chain_for_component(self, name, output):
1381  """Get the chain ID for a component, if any."""
1382  # todo: handle multiple copies
1383  if name in self.asym_units:
1384  return self.asym_units[name]._id
1385  else:
1386  # A non-modeled component doesn't have a chain ID
1387  return None
1388 
1389  def _get_assembly_comps(self, assembly):
1390  """Get the names of the components in the given assembly"""
1391  # component name = asym_unit.details
1392  comps = {}
1393  for ca in assembly:
1394  comps[ca.details] = None
1395  return comps
1396 
1397  def create_transformed_component(self, state, name, original, transform):
1398  """Make a new component that's a transformed copy of another.
1399  All representation for the existing component is copied to the
1400  new one."""
1401  assembly_comps = self._get_assembly_comps(state.modeled_assembly)
1402  if name in assembly_comps:
1403  raise ValueError("Component %s already exists" % name)
1404  elif original not in assembly_comps:
1405  raise ValueError("Original component %s does not exist" % original)
1406  self.create_component(state, name, True)
1407  self.add_component_sequence(state, name, self.sequence_dict[original])
1408  self._transformed_components.append(_TransformedComponent(
1409  name, original, transform))
1410  self.all_representations.copy_component(state, name, original,
1411  self.asym_units[name])
1412 
1413  def create_component(self, state, name, modeled, asym_name=None):
1414  if asym_name is None:
1415  asym_name = name
1416  new_comp = name not in self._all_components
1417  self._all_components[name] = None
1418  if modeled:
1419  state.all_modeled_components.append(name)
1420  if asym_name not in self.asym_units:
1421  # assign asym once we get sequence
1422  self.asym_units[asym_name] = None
1423  if new_comp:
1424  self.all_modeled_components.append(name)
1425 
1426  def add_component_sequence(self, state, name, seq, asym_name=None,
1427  alphabet=None, uniprot=None):
1428  if asym_name is None:
1429  asym_name = name
1430 
1431  if name in self.sequence_dict:
1432  if self.sequence_dict[name] != seq:
1433  raise ValueError("Sequence mismatch for component %s" % name)
1434  else:
1435  self.sequence_dict[name] = seq
1436  # Offset is always zero to start with; this may be modified
1437  # later in finalize_build() if any non-modeled N-terminal
1438  # residues are removed
1439  self.entities.add(name, seq, 0, alphabet, uniprot)
1440  if asym_name in self.asym_units:
1441  if self.asym_units[asym_name] is None:
1442  # Set up a new asymmetric unit for this component
1443  entity = self.entities[name]
1444  asym = AsymUnit(entity, details=asym_name)
1445  self.system.asym_units.append(asym)
1446  self.asym_units[asym_name] = asym
1447  state.modeled_assembly.append(self.asym_units[asym_name])
1448 
1449  def finalize_build(self):
1450  """Called immediately after the PMI system is built"""
1451  for entity in self.system.entities:
1452  _trim_unrep_termini(entity, self.system.asym_units,
1453  self.system.orphan_representations)
1454 
1455  def _add_restraint_model_fits(self):
1456  """Add fits to restraints for all known models"""
1457  for group, m in self.system._all_models():
1458  if m._is_restrained:
1459  for r in self.system.restraints:
1460  if hasattr(r, 'add_fits_from_model_statfile'):
1461  r.add_fits_from_model_statfile(m)
1462 
1463  def finalize(self):
1464  """Do any final processing on the class hierarchy.
1465  After calling this method, the `system` member (an instance
1466  of `ihm.System`) completely reproduces the PMI modeling, and
1467  can be written out to an mmCIF file with `ihm.dumper.write`,
1468  and/or modified using the ihm API."""
1469  self._add_restraint_model_fits()
1470 
1471  def add_pdb_element(self, state, name, start, end, offset, pdbname,
1472  chain, hier, representation=None):
1473  if self._is_excluded(name, start, end):
1474  return
1475  if representation is None:
1476  representation = self.default_representation
1477  asym = self.asym_units[name]
1478  p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
1479  hier, asym)
1480  self.all_representations.add_fragment(state, representation, p)
1481  self.all_starting_models.add_pdb_fragment(p)
1482 
1483  def add_bead_element(self, state, name, start, end, num, hier,
1484  representation=None):
1485  if self._is_excluded(name, start, end):
1486  return
1487  if representation is None:
1488  representation = self.default_representation
1489  asym = self.asym_units[name]
1490  pmi_offset = asym.entity.pmi_offset
1491  b = _BeadsFragment(state, name, start - pmi_offset, end - pmi_offset,
1492  num, hier, asym)
1493  self.all_representations.add_fragment(state, representation, b)
1494 
1495  def get_cross_link_group(self, pmi_restraint):
1496  r = _CrossLinkRestraint(pmi_restraint)
1497  self.system.restraints.append(r)
1498  self._add_restraint_dataset(r) # so that all-dataset group works
1499  return r
1500 
1501  def add_experimental_cross_link(self, r1, c1, r2, c2, rsr):
1502  if c1 not in self._all_components or c2 not in self._all_components:
1503  # Crosslink refers to a component we didn't model
1504  # As a quick hack, just ignore it.
1505  # todo: need to add an entity for this anyway (so will need the
1506  # sequence, and add to struct_assembly)
1507  return None
1508  e1 = self.entities[c1]
1509  e2 = self.entities[c2]
1510  xl = ihm.restraint.ExperimentalCrossLink(residue1=e1.pmi_residue(r1),
1511  residue2=e2.pmi_residue(r2))
1512  rsr.experimental_cross_links.append([xl])
1513  return xl
1514 
1515  def add_cross_link(self, state, ex_xl, p1, p2, length, sigma1_p, sigma2_p,
1516  psi_p, rsr):
1517  # Map p1/p2 to asym units
1518  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1519  d = ihm.restraint.UpperBoundDistanceRestraint(length)
1520 
1521  if _get_by_residue(p1) and _get_by_residue(p2):
1522  cls = _ResidueCrossLink
1523  else:
1524  cls = _FeatureCrossLink
1525  xl = cls(ex_xl, asym1=asym[p1], asym2=asym[p2], distance=d,
1526  restrain_all=False)
1527  # Needed to look up fits later
1528  xl.psi_p, xl.sigma1_p, xl.sigma2_p = psi_p, sigma1_p, sigma2_p
1529  rsr.cross_links.append(xl)
1530 
1531  def add_replica_exchange(self, state, rex):
1532  # todo: allow for metadata to say how many replicas were used in the
1533  # actual experiment, and how many independent runs were carried out
1534  # (use these as multipliers to get the correct total number of
1535  # output models)
1536  step = _ReplicaExchangeProtocolStep(state, rex)
1537  step.software = self.all_software.pmi
1538  self.all_protocols.add_step(step, state)
1539 
1540  def _add_simple_dynamics(self, num_models_end, method):
1541  # Always assumed that we're dealing with the last state
1542  state = self._last_state
1543  self.all_protocols.add_step(_SimpleProtocolStep(state, num_models_end,
1544  method), state)
1545 
1546  def _add_protocol(self):
1547  # Always assumed that we're dealing with the last state
1548  state = self._last_state
1549  self.all_protocols.add_protocol(state)
1550 
1551  def _add_dataset(self, dataset):
1552  return self.all_datasets.add(self._last_state, dataset)
1553 
1554  def _add_restraint_dataset(self, restraint):
1555  return self.all_datasets.add_restraint(self._last_state, restraint)
1556 
1557  def _add_simple_postprocessing(self, num_models_begin, num_models_end):
1558  # Always assumed that we're dealing with the last state
1559  state = self._last_state
1560  pp = ihm.analysis.ClusterStep('RMSD', num_models_begin, num_models_end)
1561  self.all_protocols.add_postproc(pp, state)
1562  return pp
1563 
1564  def _add_no_postprocessing(self, num_models):
1565  # Always assumed that we're dealing with the last state
1566  state = self._last_state
1567  pp = ihm.analysis.EmptyStep()
1568  pp.num_models_begin = pp.num_models_end = num_models
1569  self.all_protocols.add_postproc(pp, state)
1570  return pp
1571 
1572  def _add_simple_ensemble(self, pp, name, num_models, drmsd,
1573  num_models_deposited, localization_densities,
1574  ensemble_file):
1575  """Add an ensemble generated by ad hoc methods (not using PMI).
1576  This is currently only used by the Nup84 system."""
1577  # Always assumed that we're dealing with the last state
1578  state = self._last_state
1579  group = ihm.model.ModelGroup(name=state.get_postfixed_name(name))
1580  state.add_model_group(group)
1581  if ensemble_file:
1582  self.system.locations.append(ensemble_file)
1583  e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
1584  ensemble_file)
1585  self.system.ensembles.append(e)
1586  for c in state.all_modeled_components:
1587  den = localization_densities.get(c, None)
1588  if den:
1589  e.load_localization_density(state, c, self.asym_units[c], den)
1590  return e
1591 
1592  def set_ensemble_file(self, i, location):
1593  """Point a previously-created ensemble to an 'all-models' file.
1594  This could be a trajectory such as DCD, an RMF, or a multimodel
1595  PDB file."""
1596  self.system.locations.append(location)
1597  # Ensure that we point to an ensemble related to the current state
1598  ind = i + self._state_ensemble_offset
1599  self.system.ensembles[ind].file = location
1600 
1601  def add_replica_exchange_analysis(self, state, rex, density_custom_ranges):
1602  # todo: add prefilter as an additional postprocess step (complication:
1603  # we don't know how many models it filtered)
1604  # todo: postpone rmsf/density localization extraction until after
1605  # relevant methods are called (currently it is done from the
1606  # constructor)
1607  protocol = self.all_protocols.get_last_protocol(state)
1608  num_models = protocol.steps[-1].num_models_end
1609  pp = _ReplicaExchangeAnalysisPostProcess(rex, num_models)
1610  pp.software = self.all_software.pmi
1611  self.all_protocols.add_postproc(pp, state)
1612  for i in range(rex._number_of_clusters):
1613  group = ihm.model.ModelGroup(name=state.get_prefixed_name(
1614  'cluster %d' % (i + 1)))
1615  state.add_model_group(group)
1616  # todo: make # of models to deposit configurable somewhere
1617  e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
1618  self.system.ensembles.append(e)
1619  # Add localization density info if available
1620  for fname, stuple in sorted(density_custom_ranges.items()):
1621  e.load_localization_density(state, fname, stuple,
1622  self.asym_units)
1623  for stats in e.load_all_models(self, state):
1624  m = self.add_model(group)
1625  # Since we currently only deposit 1 model, it is the
1626  # best scoring one
1627  m.name = 'Best scoring model'
1628  m.stats = stats
1629  # Add RMSF info if available
1630  for c in state.all_modeled_components:
1631  e.load_rmsf(m, c)
1632 
1633  def _get_subassembly(self, comps, name, description):
1634  """Get an Assembly consisting of the given components.
1635  `compdict` is a dictionary of the components to add, where keys
1636  are the component names and values are the sequence ranges (or
1637  None to use all residues in the component)."""
1638  asyms = []
1639  for comp, seqrng in comps.items():
1640  a = self.asym_units[comp]
1641  asyms.append(a if seqrng is None else a(*seqrng))
1642 
1643  a = ihm.Assembly(asyms, name=name, description=description)
1644  return a
1645 
1646  def _add_foxs_restraint(self, model, comp, seqrange, dataset, rg, chi,
1647  details):
1648  """Add a basic FoXS fit. This is largely intended for use from the
1649  NPC application."""
1650  assembly = self._get_subassembly(
1651  {comp: seqrange},
1652  name="SAXS subassembly",
1653  description="All components that fit SAXS data")
1654  r = ihm.restraint.SASRestraint(
1655  dataset, assembly, segment=False,
1656  fitting_method='FoXS', fitting_atom_type='Heavy atoms',
1657  multi_state=False, radius_of_gyration=rg, details=details)
1658  r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi)
1659  self.system.restraints.append(r)
1660  self._add_restraint_dataset(r) # so that all-dataset group works
1661 
1662  def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
1663  image_resolution, projection_number,
1664  micrographs_number):
1665  r = _EM2DRestraint(state, r, i, resolution, pixel_size,
1666  image_resolution, projection_number,
1667  micrographs_number)
1668  self.system.restraints.append(r)
1669  self._add_restraint_dataset(r) # so that all-dataset group works
1670 
1671  def add_em3d_restraint(self, state, target_ps, densities, pmi_restraint):
1672  # todo: need to set allow_duplicates on this dataset?
1673  r = _EM3DRestraint(self, state, pmi_restraint, target_ps, densities)
1674  self.system.restraints.append(r)
1675  self._add_restraint_dataset(r) # so that all-dataset group works
1676 
1677  def add_zaxial_restraint(self, state, ps, lower_bound, upper_bound,
1678  sigma, pmi_restraint):
1679  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1680  sigma, pmi_restraint, self._xy_plane)
1681 
1682  def add_yaxial_restraint(self, state, ps, lower_bound, upper_bound,
1683  sigma, pmi_restraint):
1684  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1685  sigma, pmi_restraint, self._xz_plane)
1686 
1687  def add_xyradial_restraint(self, state, ps, lower_bound, upper_bound,
1688  sigma, pmi_restraint):
1689  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1690  sigma, pmi_restraint, self._z_axis)
1691 
1692  def _add_geometric_restraint(self, state, ps, lower_bound, upper_bound,
1693  sigma, pmi_restraint, geom):
1694  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1695  r = _GeometricRestraint(
1696  self, state, pmi_restraint, geom, asym.get_feature(ps),
1697  ihm.restraint.LowerUpperBoundDistanceRestraint(lower_bound,
1698  upper_bound),
1699  sigma)
1700  self.system.restraints.append(r)
1701  self._add_restraint_dataset(r) # so that all-dataset group works
1702 
1703  def _get_membrane(self, tor_R, tor_r, tor_th):
1704  """Get an object representing a half-torus membrane"""
1705  if not hasattr(self, '_seen_membranes'):
1706  self._seen_membranes = {}
1707  # If a membrane already exists with all dimensions within 0.01 of
1708  # this one, reuse it
1709  membrane_id = tuple(int(x * 100.) for x in (tor_R, tor_r, tor_th))
1710  if membrane_id not in self._seen_membranes:
1711  m = ihm.geometry.HalfTorus(
1712  center=self._center_origin,
1713  transformation=self._identity_transform,
1714  major_radius=tor_R, minor_radius=tor_r, thickness=tor_th,
1715  inner=True, name='Membrane')
1716  self._seen_membranes[membrane_id] = m
1717  return self._seen_membranes[membrane_id]
1718 
1719  def add_membrane_surface_location_restraint(
1720  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1721  self._add_membrane_restraint(
1722  state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint,
1723  ihm.restraint.UpperBoundDistanceRestraint(0.))
1724 
1725  def add_membrane_exclusion_restraint(
1726  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1727  self._add_membrane_restraint(
1728  state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint,
1729  ihm.restraint.LowerBoundDistanceRestraint(0.))
1730 
1731  def _add_membrane_restraint(self, state, ps, tor_R, tor_r, tor_th,
1732  sigma, pmi_restraint, rsr):
1733  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1734  r = _GeometricRestraint(
1735  self, state, pmi_restraint,
1736  self._get_membrane(tor_R, tor_r, tor_th), asym.get_feature(ps),
1737  rsr, sigma)
1738  self.system.restraints.append(r)
1739  self._add_restraint_dataset(r) # so that all-dataset group works
1740 
1741  def add_model(self, group, assembly=None, representation=None):
1742  state = self._last_state
1743  if representation is None:
1744  representation = self.default_representation
1745  protocol = self.all_protocols.get_last_protocol(state)
1746  m = _Model(state.prot, self, protocol,
1747  assembly if assembly else state.modeled_assembly,
1748  representation)
1749  group.append(m)
1750  return m
1751 
1752 
1754  """Get custom python-ihm dumpers for writing PMI to from mmCIF.
1755  This returns a list of custom dumpers that can be passed as all or
1756  part of the `dumpers` argument to ihm.dumper.write(). They add
1757  PMI-specific information to mmCIF or BinaryCIF files written out
1758  by python-ihm."""
1759  return [_ReplicaExchangeProtocolDumper]
1760 
1761 
1763  """Get custom python-ihm handlers for reading PMI data from mmCIF.
1764  This returns a list of custom handlers that can be passed as all or
1765  part of the `handlers` argument to ihm.reader.read(). They read
1766  PMI-specific information from mmCIF or BinaryCIF files read in
1767  by python-ihm."""
1768  return [_ReplicaExchangeProtocolHandler]
1769 
1770 
1771 class GMMParser(ihm.metadata.Parser):
1772  """Extract metadata from an EM density GMM file."""
1773 
1774  def parse_file(self, filename):
1775  """Extract metadata from `filename`.
1776  @return a dict with key `dataset` pointing to the GMM file and
1777  `number_of_gaussians` to the number of GMMs (or None)"""
1778  loc = ihm.location.InputFileLocation(
1779  filename,
1780  details="Electron microscopy density map, "
1781  "represented as a Gaussian Mixture Model (GMM)")
1782  # A 3DEM restraint's dataset ID uniquely defines the mmCIF
1783  # restraint, so we need to allow duplicates
1784  loc._allow_duplicates = True
1785  d = ihm.dataset.EMDensityDataset(loc)
1786  ret = {'dataset': d, 'number_of_gaussians': None}
1787 
1788  with open(filename) as fh:
1789  for line in fh:
1790  if line.startswith('# data_fn: '):
1791  p = ihm.metadata.MRCParser()
1792  fn = line[11:].rstrip('\r\n')
1793  dataset = p.parse_file(os.path.join(
1794  os.path.dirname(filename), fn))['dataset']
1795  ret['dataset'].parents.append(dataset)
1796  elif line.startswith('# ncenters: '):
1797  ret['number_of_gaussians'] = int(line[12:])
1798  return ret
1799 
1800 
1801 def _trim_unrep_termini(entity, asyms, representations):
1802  """Trim Entity sequence to only cover represented residues.
1803 
1804  PDB policy is for amino acid Entity sequences to be polymers (so
1805  they should include any loops or other gaps in the midst of the
1806  sequence) but for the termini to be trimmed of any not-modeled
1807  residues. Here, we modify the Entity sequence to remove any parts
1808  that are not included in any representation. This may change the
1809  numbering if any N-terminal residues are removed, and thus the offset
1810  between PMI and IHM numbering, as both count from 1."""
1811  rep_range = None
1812  for rep in representations:
1813  for seg in rep:
1814  if seg.asym_unit.entity is entity:
1815  seg_range = seg.asym_unit.seq_id_range
1816  if rep_range is None:
1817  rep_range = list(seg_range)
1818  else:
1819  rep_range[0] = min(rep_range[0], seg_range[0])
1820  rep_range[1] = max(rep_range[1], seg_range[1])
1821  # Do nothing if no representations or if everything is represented
1822  if rep_range is None or rep_range == [1, len(entity.sequence)]:
1823  return
1824  # Update offset (= number of unrepresented N terminal entity residues)
1825  # in entity and all asyms
1826  # Force offset to be a real Python int (not numpy.int64) as python-ihm
1827  # up to version 1.8 requires auth_seq_id_map to be int.
1828  pmi_offset = int(rep_range[0]) - 1
1829  entity.pmi_offset = pmi_offset
1830  for asym in asyms:
1831  if asym.entity is entity:
1832  asym.auth_seq_id_map = entity.pmi_offset
1833  # Modify entity sequence to only include represented residues
1834  entity.sequence = entity.sequence[rep_range[0] - 1:rep_range[1]]
1835 
1836  # If N terminal deletions, we must fix the numbering of all segments
1837  # and starting models
1838  if pmi_offset != 0:
1839  for rep in representations:
1840  for seg in rep:
1841  if seg.asym_unit.entity is entity:
1842  seg_range = seg.asym_unit.seq_id_range
1843  seg.asym_unit.seq_id_range = (seg_range[0] - pmi_offset,
1844  seg_range[1] - pmi_offset)
1845  if seg.starting_model:
1846  model = seg.starting_model
1847  seg_range = model.asym_unit.seq_id_range
1848  model.asym_unit.seq_id_range = \
1849  (seg_range[0] - pmi_offset,
1850  seg_range[1] - pmi_offset)
1851  model.offset = model.offset - pmi_offset
Select non water and non hydrogen atoms.
Definition: pdb.h:314
def get_handlers
Get custom python-ihm handlers for reading PMI data from mmCIF.
Definition: mmcif.py:1762
def create_representation
Create a new Representation and return it.
Definition: mmcif.py:1346
def create_transformed_component
Make a new component that's a transformed copy of another.
Definition: mmcif.py:1397
Simple 3D transformation class.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:158
def exclude_coordinates
Don't record coordinates for the given domain.
Definition: mmcif.py:1356
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
def add_uniprot_reference
Add UniProt accession (if available) to the IHM system.
Definition: mmcif.py:1232
def finalize_build
Called immediately after the PMI system is built.
Definition: mmcif.py:1449
Class to encode a modeling protocol as mmCIF.
Definition: mmcif.py:1297
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:245
def parse_file
Extract metadata from filename.
Definition: mmcif.py:1775
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: mmcif.py:1221
Miscellaneous utilities.
Definition: tools.py:1
Extract metadata from an EM density GMM file.
Definition: mmcif.py:1771
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:749
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: mmcif.py:1267
void read_pdb(TextInput input, int model, Hierarchy h)
A single asymmetric unit in the system.
Definition: mmcif.py:1251
A single entity in the system.
Definition: mmcif.py:1188
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
Definition: mmcif.py:1592
Classes to represent data structures used in mmCIF.
Definition: data.py:1
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: mmcif.py:1271
void add_restraint(RMF::FileHandle fh, Restraint *hs)
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def finalize
Do any final processing on the class hierarchy.
Definition: mmcif.py:1463
Base class for capturing a modeling protocol.
Definition: output.py:41
Basic utilities for handling cryo-electron microscopy 3D density maps.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
def get_dumpers
Get custom python-ihm dumpers for writing PMI to from mmCIF.
Definition: mmcif.py:1753
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:199
3D rotation class.
Definition: Rotation3D.h:52
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
A decorator for a residue.
Definition: Residue.h:137
Basic functionality that is expected to be used by a wide variety of IMP users.
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: mmcif.py:1217
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
VectorD< 3 > Vector3D
Definition: VectorD.h:408
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:256
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...