IMP logo
IMP Reference Guide  develop.d97d4ead1f,2024/11/21
The Integrative Modeling Platform
mmcif.py
1 """@namespace IMP.pmi.mmcif
2  @brief Support for the mmCIF file format.
3 
4  IMP has basic support for writing out files in mmCIF format, for
5  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
6  mmCIF files are currently generated by creating an
7  IMP.pmi.mmcif.ProtocolOutput class, and attaching it to an
8  IMP.pmi.representation.Representation object, after which any
9  generated models and metadata are collected and output as mmCIF.
10 """
11 
12 import copy
13 import RMF
14 import IMP.core
15 import IMP.algebra
16 import IMP.atom
17 import IMP.em
18 import IMP.isd
19 import IMP.pmi.tools
20 import IMP.pmi.alphabets
21 from IMP.pmi.tools import OrderedDict
22 import IMP.pmi.output
23 import IMP.mmcif.data
24 import re
25 import ast
26 import sys
27 import os
28 import weakref
29 import ihm.location
30 import ihm.dataset
31 import ihm.dumper
32 import ihm.reader
33 import ihm.metadata
34 import ihm.startmodel
35 import ihm.model
36 import ihm.protocol
37 import ihm.analysis
38 import ihm.representation
39 import ihm.geometry
40 import ihm.cross_linkers
41 
42 
43 def _assign_id(obj, seen_objs, obj_by_id):
44  """Assign a unique ID to obj, and track all ids in obj_by_id."""
45  if obj not in seen_objs:
46  if not hasattr(obj, 'id'):
47  obj_by_id.append(obj)
48  obj.id = len(obj_by_id)
49  seen_objs[obj] = obj.id
50  else:
51  obj.id = seen_objs[obj]
52 
53 
54 def _get_by_residue(p):
55  """Determine whether the given particle represents a specific residue
56  or a more coarse-grained object."""
58 
59 
60 class _ComponentMapper:
61  """Map a Particle to a component name"""
62  def __init__(self, prot):
63  self.o = IMP.pmi.output.Output()
64  self.prot = prot
65  self.name = 'cif-output'
66  self.o.dictionary_pdbs[self.name] = self.prot
67  self.o._init_dictchain(self.name, self.prot,
68  multichar_chain=True, mmcif=True)
69 
70  def __getitem__(self, p):
71  protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
72  return protname
73 
74 
75 class _AsymMapper:
76  """Map a Particle to an asym_unit"""
77  def __init__(self, simo, prot):
78  self.simo = simo
79  self._cm = _ComponentMapper(prot)
80  self._seen_ranges = {}
81 
82  def __getitem__(self, p):
83  protname = self._cm[p]
84  return self.simo.asym_units[protname]
85 
86  def get_feature(self, ps):
87  """Get an ihm.restraint.Feature that covers the given particles"""
88  # todo: handle things other than residues
89  rngs = []
90  for p in ps:
91  asym = self[p]
92  # todo: handle overlapping ranges
94  rind = IMP.atom.Residue(p).get_index()
95  rng = asym(rind, rind)
97  # PMI Fragments always contain contiguous residues
98  rinds = IMP.atom.Fragment(p).get_residue_indexes()
99  rng = asym(rinds[0], rinds[-1])
100  else:
101  raise ValueError("Unsupported particle type %s" % str(p))
102  # Join contiguous ranges
103  if len(rngs) > 0 and rngs[-1].asym == asym \
104  and rngs[-1].seq_id_range[1] == rng.seq_id_range[0] - 1:
105  rngs[-1].seq_id_range = (rngs[-1].seq_id_range[0],
106  rng.seq_id_range[1])
107  else:
108  rngs.append(rng)
109  # If an identical feature already exists, return that
110  # todo: python-ihm should handle this automatically for us
111  hrngs = tuple(rngs)
112  if hrngs in self._seen_ranges:
113  return self._seen_ranges[hrngs]
114  else:
115  feat = ihm.restraint.ResidueFeature(rngs)
116  self._seen_ranges[hrngs] = feat
117  return feat
118 
119 
120 class _AllSoftware:
121  def __init__(self, system):
122  self.system = system
123  self.modeller_used = self.phyre2_used = False
124  self.pmi = ihm.Software(
125  name="IMP PMI module",
126  version=IMP.pmi.__version__,
127  classification="integrative model building",
128  description="integrative model building",
129  location='https://integrativemodeling.org')
130  self.imp = ihm.Software(
131  name="Integrative Modeling Platform (IMP)",
132  version=IMP.__version__,
133  classification="integrative model building",
134  description="integrative model building",
135  location='https://integrativemodeling.org')
136  # Only recent versions of python-ihm support adding citations for
137  # software
138  if hasattr(self.imp, 'citation'):
139  javi = 'Vel\u00e1zquez-Muriel J'
140  self.imp.citation = ihm.Citation(
141  pmid='22272186',
142  title='Putting the pieces together: integrative modeling '
143  'platform software for structure determination of '
144  'macromolecular assemblies',
145  journal='PLoS Biol', volume=10, page_range='e1001244',
146  year=2012,
147  authors=['Russel D', 'Lasker K', 'Webb B', javi, 'Tjioe E',
148  'Schneidman-Duhovny D', 'Peterson B', 'Sali A'],
149  doi='10.1371/journal.pbio.1001244')
150  self.pmi.citation = ihm.Citation(
151  pmid='31396911',
152  title='Modeling Biological Complexes Using Integrative '
153  'Modeling Platform.',
154  journal='Methods Mol Biol', volume=2022, page_range=(353, 377),
155  year=2019,
156  authors=['Saltzberg D', 'Greenberg CH', 'Viswanath S',
157  'Chemmama I', 'Webb B', 'Pellarin R', 'Echeverria I',
158  'Sali A'],
159  doi='10.1007/978-1-4939-9608-7_15')
160  self.system.software.extend([self.pmi, self.imp])
161 
162  def set_modeller_used(self, version, date):
163  if self.modeller_used:
164  return
165  self.modeller_used = True
166  s = ihm.Software(
167  name='MODELLER', classification='comparative modeling',
168  description='Comparative modeling by satisfaction '
169  'of spatial restraints, build ' + date,
170  location='https://salilab.org/modeller/', version=version)
171  self.system.software.append(s)
172  if hasattr(s, 'citation'):
173  s.citation = ihm.Citation(
174  pmid='8254673',
175  title='Comparative protein modelling by satisfaction of '
176  'spatial restraints.',
177  journal='J Mol Biol', volume=234, page_range=(779, 815),
178  year=1993, authors=['Sali A', 'Blundell TL'],
179  doi='10.1006/jmbi.1993.1626')
180 
181  def set_phyre2_used(self):
182  if self.phyre2_used:
183  return
184  self.phyre2_used = True
185  s = ihm.Software(
186  name='Phyre2', classification='protein homology modeling',
187  description='Protein Homology/analogY Recognition Engine V 2.0',
188  version='2.0', location='http://www.sbg.bio.ic.ac.uk/~phyre2/')
189  if hasattr(s, 'citation'):
190  s.citation = ihm.Citation(
191  pmid='25950237',
192  title='The Phyre2 web portal for protein modeling, '
193  'prediction and analysis.',
194  journal='Nat Protoc', volume=10, page_range=(845, 858),
195  authors=['Kelley LA', 'Mezulis S', 'Yates CM', 'Wass MN',
196  'Sternberg MJ'],
197  year=2015, doi='10.1038/nprot.2015.053')
198  self.system.software.append(s)
199 
200 
201 def _get_fragment_is_rigid(fragment):
202  """Determine whether a fragment is modeled rigidly"""
203  leaves = IMP.atom.get_leaves(fragment.hier)
204  # Assume all leaves are set up as rigid/flexible in the same way
205  return IMP.core.RigidMember.get_is_setup(leaves[0])
206 
207 
208 class _PDBFragment(ihm.representation.ResidueSegment):
209  """Record details about part of a PDB file used as input
210  for a component."""
211  def __init__(self, state, component, start, end, pdb_offset,
212  pdbname, chain, hier, asym_unit):
213  # start, end are PMI residue indexes (not IHM)
214  super().__init__(
215  asym_unit=asym_unit.pmi_range(start, end),
216  rigid=None, primitive='sphere')
217  self.component, self.start, self.end, self.offset, self.pdbname \
218  = component, start, end, pdb_offset, pdbname
219  self.state, self.chain, self.hier = state, chain, hier
221  & IMP.atom.ChainPDBSelector([chain])
222  self.starting_hier = IMP.atom.read_pdb(pdbname, state.model, sel)
223 
224  rigid = property(lambda self: _get_fragment_is_rigid(self),
225  lambda self, val: None)
226 
227  def combine(self, other):
228  pass
229 
230 
231 class _BeadsFragment(ihm.representation.FeatureSegment):
232  """Record details about beads used to represent part of a component."""
233  chain = None
234 
235  def __init__(self, state, component, start, end, count, hier, asym_unit):
236  super().__init__(
237  asym_unit=asym_unit(start, end), rigid=None, primitive='sphere',
238  count=count)
239  self.state, self.component, self.hier = state, component, hier
240 
241  rigid = property(lambda self: _get_fragment_is_rigid(self),
242  lambda self, val: None)
243 
244  def combine(self, other):
245  # todo: don't combine if one fragment is rigid and the other flexible
246  if (type(other) == type(self) and # noqa: E721
247  other.asym_unit.seq_id_range[0]
248  == self.asym_unit.seq_id_range[1] + 1):
249  self.asym_unit.seq_id_range = (self.asym_unit.seq_id_range[0],
250  other.asym_unit.seq_id_range[1])
251  self.count += other.count
252  return True
253 
254 
255 class _AllModelRepresentations:
256  def __init__(self, simo):
257  self.simo = simo
258  # dict of fragments, ordered by representation, then component name,
259  # then state
260  self.fragments = OrderedDict()
261  self._all_representations = {}
262 
263  def copy_component(self, state, name, original, asym_unit):
264  """Copy all representation for `original` in `state` to `name`"""
265  def copy_frag(f):
266  newf = copy.copy(f)
267  newf.asym_unit = asym_unit(*f.asym_unit.seq_id_range)
268  return newf
269  for rep in self.fragments:
270  if original in self.fragments[rep]:
271  if name not in self.fragments[rep]:
272  self.fragments[rep][name] = OrderedDict()
273  self.fragments[rep][name][state] = [
274  copy_frag(f) for f in self.fragments[rep][original][state]]
275  # Assume representation for a component is the same in all
276  # states, so only write out the first one
277  first_state = list(self.fragments[rep][name].keys())[0]
278  if state is first_state:
279  representation = self._all_representations[rep]
280  representation.extend(self.fragments[rep][name][state])
281 
282  def add_fragment(self, state, representation, fragment):
283  """Add a model fragment."""
284  comp = fragment.component
285  id_rep = id(representation)
286  self._all_representations[id_rep] = representation
287  if id_rep not in self.fragments:
288  self.fragments[id_rep] = OrderedDict()
289  if comp not in self.fragments[id_rep]:
290  self.fragments[id_rep][comp] = OrderedDict()
291  if state not in self.fragments[id_rep][comp]:
292  self.fragments[id_rep][comp][state] = []
293  fragments = self.fragments[id_rep][comp][state]
294  if len(fragments) == 0 or not fragments[-1].combine(fragment):
295  fragments.append(fragment)
296  # Assume representation for a component is the same in all states,
297  # so only write out the first one
298  first_state = list(self.fragments[id_rep][comp].keys())[0]
299  if state is first_state:
300  representation.append(fragment)
301 
302 
303 class _AllDatasets:
304  """Track all datasets generated by PMI and add them to the ihm.System"""
305  def __init__(self, system):
306  self.system = system
307  self._datasets = []
308  self._datasets_by_state = {}
309  self._restraints_by_state = {}
310 
311  def get_all_group(self, state):
312  """Get a DatasetGroup encompassing all datasets so far in this state"""
313  # Note that if a restraint dataset is replaced after this is called,
314  # the group will still contain the old dataset - mark dataset as read
315  # only?
316  g = ihm.dataset.DatasetGroup(
317  self._datasets_by_state.get(state, [])
318  + [r.dataset for r in self._restraints_by_state.get(state, [])
319  if r.dataset])
320  return g
321 
322  def add(self, state, dataset):
323  """Add a new dataset."""
324  self._datasets.append(dataset)
325  if state not in self._datasets_by_state:
326  self._datasets_by_state[state] = []
327  self._datasets_by_state[state].append(dataset)
328  # Make sure that the dataset ends up in the mmCIF file even if nothing
329  # refers to it
330  self.system.orphan_datasets.append(dataset)
331  return dataset
332 
333  def add_restraint(self, state, restraint):
334  """Add the dataset for a restraint"""
335  if state not in self._restraints_by_state:
336  self._restraints_by_state[state] = []
337  self._restraints_by_state[state].append(restraint)
338 
339 
340 class _CrossLinkRestraint(ihm.restraint.CrossLinkRestraint):
341  """Restrain to a set of cross-links"""
342 
343  assembly = None
344  _label_map = {'wtDSS': 'DSS', 'scDSS': 'DSS', 'scEDC': 'EDC'}
345  _descriptor_map = {'DSS': ihm.cross_linkers.dss,
346  'EDC': ihm.cross_linkers.edc}
347 
348  def __init__(self, pmi_restraint):
349  self.pmi_restraint = pmi_restraint
350  # Use ChemDescriptor from PMI restraint if available, otherwise guess
351  # it using the label
352  linker = getattr(self.pmi_restraint, 'linker', None)
353  label = self.pmi_restraint.label
354  self.label = label
355  super().__init__(
356  dataset=self.pmi_restraint.dataset,
357  linker=linker or self._get_chem_descriptor(label))
358 
359  @classmethod
360  def _get_chem_descriptor(cls, label):
361  # Map commonly-used subtypes to more standard labels
362  label = cls._label_map.get(label, label)
363  if label not in cls._descriptor_map:
364  # If label is not a standard linker type, make a new chemical
365  # descriptor containing just the name. We don't know the chemistry
366  # so cannot specify a SMILES or INCHI string for it at this point
367  d = ihm.ChemDescriptor(label)
368  cls._descriptor_map[label] = d
369  return cls._descriptor_map[label]
370 
371  def _set_psi_sigma(self, model):
372  old_values = []
373  # Do nothing if given a different state than the restraint applies to
374  if model.m != self.pmi_restraint.model:
375  return
376  for resolution in self.pmi_restraint.sigma_dictionary:
377  statname = 'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, self.label)
378  if model.stats and statname in model.stats:
379  sigma = float(model.stats[statname])
380  p = self.pmi_restraint.sigma_dictionary[resolution][0]
381  old_values.append((p, p.get_scale()))
382  p.set_scale(sigma)
383  for psiindex in self.pmi_restraint.psi_dictionary:
384  statname = 'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, self.label)
385  if model.stats and statname in model.stats:
386  psi = float(model.stats[statname])
387  p = self.pmi_restraint.psi_dictionary[psiindex][0]
388  old_values.append((p, p.get_scale()))
389  p.set_scale(psi)
390  # If the same particle is set multiple times, make sure we get the
391  # original value last
392  return list(reversed(old_values))
393 
394  def add_fits_from_model_statfile(self, model):
395  # Set psi/sigma for all particles
396  old_values = self._set_psi_sigma(model)
397  # If no stats were collected, we can't show the fit
398  if not old_values:
399  return
400  for xl in self.cross_links:
401  # Get current psi/sigma particle value for each XL
402  xl.fits[model] = ihm.restraint.CrossLinkFit(
403  psi=xl.psi, sigma1=xl.sigma1, sigma2=xl.sigma2)
404  # Restore original psi/sigma
405  for p, scale in old_values:
406  p.set_scale(scale)
407 
408  # Have our dataset point to that in the original PMI restraint
409  def __set_dataset(self, val):
410  self.pmi_restraint.dataset = val
411  dataset = property(lambda self: self.pmi_restraint.dataset,
412  __set_dataset)
413 
414 
415 def get_asym_mapper_for_state(simo, state, asym_map):
416  asym = asym_map.get(state, None)
417  if asym is None:
418  asym = _AsymMapper(simo, state.prot)
419  asym_map[state] = asym
420  return asym
421 
422 
423 class _PMICrossLink:
424  # Query PMI particles to get psi and sigma values at time of use, not
425  # time of construction (since they may be set after creating the restraint)
426  psi = property(lambda self: self.psi_p.get_scale(),
427  lambda self, val: None)
428  sigma1 = property(lambda self: self.sigma1_p.get_scale(),
429  lambda self, val: None)
430  sigma2 = property(lambda self: self.sigma2_p.get_scale(),
431  lambda self, val: None)
432 
433 
434 class _ResidueCrossLink(ihm.restraint.ResidueCrossLink, _PMICrossLink):
435  pass
436 
437 
438 class _FeatureCrossLink(ihm.restraint.FeatureCrossLink, _PMICrossLink):
439  pass
440 
441 
442 class _EM2DRestraint(ihm.restraint.EM2DRestraint):
443  def __init__(self, state, pmi_restraint, image_number, resolution,
444  pixel_size, image_resolution, projection_number,
445  micrographs_number):
446  self.pmi_restraint, self.image_number = pmi_restraint, image_number
447  super().__init__(
448  dataset=pmi_restraint.datasets[image_number],
449  assembly=state.modeled_assembly,
450  segment=False, number_raw_micrographs=micrographs_number,
451  pixel_size_width=pixel_size, pixel_size_height=pixel_size,
452  image_resolution=image_resolution,
453  number_of_projections=projection_number)
454 
455  # Have our dataset point to that in the original PMI restraint
456  def __get_dataset(self):
457  return self.pmi_restraint.datasets[self.image_number]
458 
459  def __set_dataset(self, val):
460  self.pmi_restraint.datasets[self.image_number] = val
461 
462  dataset = property(__get_dataset, __set_dataset)
463 
464  def add_fits_from_model_statfile(self, model):
465  ccc = self._get_cross_correlation(model)
466  transform = self._get_transformation(model)
467  rot = transform.get_rotation()
468  rm = [[e for e in rot.get_rotation_matrix_row(i)] for i in range(3)]
469  self.fits[model] = ihm.restraint.EM2DRestraintFit(
470  cross_correlation_coefficient=ccc,
471  rot_matrix=rm,
472  tr_vector=transform.get_translation())
473 
474  def _get_transformation(self, model):
475  """Get the transformation that places the model on the image"""
476  stats = model.em2d_stats or model.stats
477  prefix = 'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
478  self.image_number + 1)
479  r = [float(stats[prefix + '_Rotation%d' % i]) for i in range(4)]
480  t = [float(stats[prefix + '_Translation%d' % i])
481  for i in range(3)]
482  # If the model coordinates are transformed, need to back transform
483  # them first
484  inv = model.transform.get_inverse()
486  IMP.algebra.Vector3D(*t)) * inv
487 
488  def _get_cross_correlation(self, model):
489  """Get the cross correlation coefficient between the model projection
490  and the image"""
491  stats = model.em2d_stats or model.stats
492  return float(stats['ElectronMicroscopy2D_%s_Image%d_CCC'
493  % (self.pmi_restraint.label,
494  self.image_number + 1)])
495 
496 
497 class _EM3DRestraint(ihm.restraint.EM3DRestraint):
498 
499  def __init__(self, simo, state, pmi_restraint, target_ps, densities):
500  self.pmi_restraint = pmi_restraint
501  super().__init__(
502  dataset=pmi_restraint.dataset,
503  assembly=self._get_assembly(densities, simo, state),
504  fitting_method='Gaussian mixture models',
505  number_of_gaussians=len(target_ps))
506 
507  # Have our dataset point to that in the original PMI restraint
508  def __set_dataset(self, val):
509  self.pmi_restraint.dataset = val
510  dataset = property(lambda self: self.pmi_restraint.dataset,
511  __set_dataset)
512 
513  def _get_assembly(self, densities, simo, state):
514  """Get the Assembly that this restraint acts on"""
515  cm = _ComponentMapper(state.prot)
516  components = {}
517  for d in densities:
518  components[cm[d]] = None # None == all residues in this component
519  a = simo._get_subassembly(
520  components, name="EM subassembly",
521  description="All components that fit the EM map")
522  return a
523 
524  def add_fits_from_model_statfile(self, model):
525  ccc = self._get_cross_correlation(model)
526  self.fits[model] = ihm.restraint.EM3DRestraintFit(
527  cross_correlation_coefficient=ccc)
528 
529  def _get_cross_correlation(self, model):
530  """Get the cross correlation coefficient between the model
531  and the map"""
532  if model.stats is not None:
533  return float(model.stats['GaussianEMRestraint_%s_CCC'
534  % self.pmi_restraint.label])
535 
536 
537 class _GeometricRestraint(ihm.restraint.GeometricRestraint):
538 
539  def __init__(self, simo, state, pmi_restraint, geometric_object,
540  feature, distance, sigma):
541  self.pmi_restraint = pmi_restraint
542  super().__init__(
543  dataset=pmi_restraint.dataset,
544  geometric_object=geometric_object, feature=feature,
545  distance=distance, harmonic_force_constant=1. / sigma,
546  restrain_all=True)
547 
548  # Have our dataset point to that in the original PMI restraint
549  def __set_dataset(self, val):
550  self.pmi_restraint.dataset = val
551  dataset = property(lambda self: self.pmi_restraint.dataset,
552  __set_dataset)
553 
554 
555 class _ReplicaExchangeProtocolStep(ihm.protocol.Step):
556  def __init__(self, state, rex):
557  if rex.monte_carlo_sample_objects is not None:
558  method = 'Replica exchange monte carlo'
559  else:
560  method = 'Replica exchange molecular dynamics'
561  self.monte_carlo_temperature = rex.vars['monte_carlo_temperature']
562  self.replica_exchange_minimum_temperature = \
563  rex.vars['replica_exchange_minimum_temperature']
564  self.replica_exchange_maximum_temperature = \
565  rex.vars['replica_exchange_maximum_temperature']
566  super().__init__(
567  assembly=state.modeled_assembly,
568  dataset_group=None, # filled in by add_step()
569  method=method, name='Sampling',
570  num_models_begin=None, # filled in by add_step()
571  num_models_end=rex.vars["number_of_frames"],
572  multi_scale=True, multi_state=False, ordered=False, ensemble=True)
573 
574 
575 class _ReplicaExchangeProtocolDumper(ihm.dumper.Dumper):
576  """Write IMP-specific information about replica exchange to mmCIF.
577  Note that IDs will have already been assigned by python-ihm's
578  standard modeling protocol dumper."""
579  def dump(self, system, writer):
580  with writer.loop("_imp_replica_exchange_protocol",
581  ["protocol_id", "step_id", "monte_carlo_temperature",
582  "replica_exchange_minimum_temperature",
583  "replica_exchange_maximum_temperature"]) as lp:
584  for p in system._all_protocols():
585  for s in p.steps:
586  if isinstance(s, _ReplicaExchangeProtocolStep):
587  self._dump_step(p, s, lp)
588 
589  def _dump_step(self, p, s, lp):
590  mintemp = s.replica_exchange_minimum_temperature
591  maxtemp = s.replica_exchange_maximum_temperature
592  lp.write(protocol_id=p._id, step_id=s._id,
593  monte_carlo_temperature=s.monte_carlo_temperature,
594  replica_exchange_minimum_temperature=mintemp,
595  replica_exchange_maximum_temperature=maxtemp)
596 
597 
598 class _ReplicaExchangeProtocolHandler(ihm.reader.Handler):
599  category = '_imp_replica_exchange_protocol'
600 
601  """Read IMP-specific information about replica exchange from mmCIF."""
602  def __call__(self, protocol_id, step_id, monte_carlo_temperature,
603  replica_exchange_minimum_temperature,
604  replica_exchange_maximum_temperature):
605  p = self.sysr.protocols.get_by_id(protocol_id)
606  # todo: match step_id properly (don't assume contiguous)
607  s = p.steps[int(step_id)-1]
608  # Upgrade from plain ihm Step to IMP subclass
609  s.__class__ = _ReplicaExchangeProtocolStep
610  s.monte_carlo_temperature = \
611  self.get_float(monte_carlo_temperature)
612  s.replica_exchange_minimum_temperature = \
613  self.get_float(replica_exchange_minimum_temperature)
614  s.replica_exchange_maximum_temperature = \
615  self.get_float(replica_exchange_maximum_temperature)
616 
617 
618 class _SimpleProtocolStep(ihm.protocol.Step):
619  def __init__(self, state, num_models_end, method):
620  super().__init__(
621  assembly=state.modeled_assembly,
622  dataset_group=None, # filled in by add_step()
623  method=method, name='Sampling',
624  num_models_begin=None, # filled in by add_step()
625  num_models_end=num_models_end,
626  multi_scale=True, multi_state=False, ordered=False,
627  ensemble=True)
628 
629 
630 class _Chain:
631  """Represent a single chain in a Model"""
632  def __init__(self, pmi_chain_id, asym_unit):
633  self.pmi_chain_id, self.asym_unit = pmi_chain_id, asym_unit
634  self.spheres = []
635  self.atoms = []
636 
637  def add(self, xyz, atom_type, residue_type, residue_index,
638  all_indexes, radius):
639  if atom_type is None:
640  self.spheres.append((xyz, residue_type, residue_index,
641  all_indexes, radius))
642  else:
643  self.atoms.append((xyz, atom_type, residue_type, residue_index,
644  all_indexes, radius))
645  orig_comp = property(lambda self: self.comp)
646 
647 
648 class _TransformedChain:
649  """Represent a chain that is a transformed version of another"""
650  def __init__(self, orig_chain, asym_unit, transform):
651  self.orig_chain, self.asym_unit = orig_chain, asym_unit
652  self.transform = transform
653 
654  def __get_spheres(self):
655  for (xyz, residue_type, residue_index, all_indexes,
656  radius) in self.orig_chain.spheres:
657  yield (self.transform * xyz, residue_type, residue_index,
658  all_indexes, radius)
659  spheres = property(__get_spheres)
660 
661  def __get_atoms(self):
662  for (xyz, atom_type, residue_type, residue_index, all_indexes,
663  radius) in self.orig_chain.atoms:
664  yield (self.transform * xyz, atom_type, residue_type,
665  residue_index, all_indexes, radius)
666  atoms = property(__get_atoms)
667 
668  entity = property(lambda self: self.orig_chain.entity)
669  orig_comp = property(lambda self: self.orig_chain.comp)
670 
671 
672 class _Excluder:
673  def __init__(self, component, simo):
674  self._seqranges = simo._exclude_coords.get(component, [])
675 
676  def is_excluded(self, indexes):
677  """Return True iff the given sequence range is excluded."""
678  for seqrange in self._seqranges:
679  if indexes[0] >= seqrange[0] and indexes[-1] <= seqrange[1]:
680  return True
681 
682 
683 class _Model(ihm.model.Model):
684  def __init__(self, prot, simo, protocol, assembly, representation):
685  super().__init__(assembly=assembly, protocol=protocol,
686  representation=representation)
687  self.simo = weakref.proxy(simo)
688  # Transformation from IMP coordinates into mmCIF coordinate system.
689  # Normally we pass through coordinates unchanged, but in some cases
690  # we may want to translate them (e.g. Nup84, where the deposited PDB
691  # files are all centered; we want the mmCIF files to match)
693  self.em2d_stats = None
694  self.stats = None
695  # True iff restraints act on this model
696  self._is_restrained = True
697  o = self.output = IMP.pmi.output.Output(atomistic=True)
698  name = 'cif-output'
699  self.m = prot.get_model()
700  o.dictionary_pdbs[name] = prot
701  o._init_dictchain(name, prot, multichar_chain=True)
702  (particle_infos_for_pdb,
703  self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
704  self.geometric_center = IMP.algebra.Vector3D(*self.geometric_center)
705  self._make_spheres_atoms(particle_infos_for_pdb, o, name, simo)
706  self.rmsf = {}
707 
708  def all_chains(self, simo):
709  """Yield all chains, including transformed ones"""
710  chain_for_comp = {}
711  for c in self.chains:
712  yield c
713  chain_for_comp[c.comp] = c
714  for tc in simo._transformed_components:
715  orig_chain = chain_for_comp.get(tc.original, None)
716  if orig_chain:
717  asym = simo.asym_units[tc.name]
718  c = _TransformedChain(orig_chain, asym, tc.transform)
719  c.comp = tc.name
720  yield c
721 
722  def _make_spheres_atoms(self, particle_infos_for_pdb, o, name, simo):
723  entity_for_chain = {}
724  comp_for_chain = {}
725  correct_asym = {}
726  for protname, chain_id in o.dictchain[name].items():
727  if protname in simo.entities:
728  entity_for_chain[chain_id] = simo.entities[protname]
729  else:
730  # Remove copy number
731  pn = protname.split('.')[0]
732  entity_for_chain[chain_id] = simo.entities[pn]
733  comp_for_chain[chain_id] = protname
734  # When doing multi-state modeling, the chain ID returned here
735  # (assigned sequentially) might not be correct (states may have
736  # gaps in the chain IDs). Map it to the correct asym unit.
737  correct_asym[chain_id] = simo.asym_units[protname]
738 
739  # Gather by chain ID (should be sorted by chain ID already)
740  self.chains = []
741  chain = None
742  excluder = None
743 
744  for (xyz, atom_type, residue_type, chain_id, residue_index,
745  all_indexes, radius) in particle_infos_for_pdb:
746  if chain is None or chain.pmi_chain_id != chain_id:
747  chain = _Chain(chain_id, correct_asym[chain_id])
748  chain.entity = entity_for_chain[chain_id]
749  chain.comp = comp_for_chain[chain_id]
750  self.chains.append(chain)
751  excluder = _Excluder(chain.comp, simo)
752  if not excluder.is_excluded(all_indexes if all_indexes
753  else [residue_index]):
754  chain.add(xyz, atom_type, residue_type, residue_index,
755  all_indexes, radius)
756 
757  def parse_rmsf_file(self, fname, component):
758  self.rmsf[component] = rmsf = {}
759  with open(str(fname)) as fh:
760  for line in fh:
761  resnum, blocknum, val = line.split()
762  rmsf[int(resnum)] = (int(blocknum), float(val))
763 
764  def get_rmsf(self, component, indexes):
765  """Get the RMSF value for the given residue indexes."""
766  if not self.rmsf:
767  return None
768  rmsf = self.rmsf[component]
769  blocknums = dict.fromkeys(rmsf[ind][0] for ind in indexes)
770  if len(blocknums) != 1:
771  raise ValueError("Residue indexes %s aren't all in the same block"
772  % str(indexes))
773  return rmsf[indexes[0]][1]
774 
775  def get_atoms(self):
776  for chain in self.all_chains(self.simo):
777  pmi_offset = chain.asym_unit.entity.pmi_offset
778  for atom in chain.atoms:
779  (xyz, atom_type, residue_type, residue_index,
780  all_indexes, radius) = atom
781  pt = self.transform * xyz
782  yield ihm.model.Atom(
783  asym_unit=chain.asym_unit,
784  seq_id=residue_index - pmi_offset,
785  atom_id=atom_type.get_string(),
786  type_symbol=None, # todo: get element
787  x=pt[0], y=pt[1], z=pt[2])
788 
789  def get_spheres(self):
790  for chain in self.all_chains(self.simo):
791  pmi_offset = chain.asym_unit.entity.pmi_offset
792  for sphere in chain.spheres:
793  (xyz, residue_type, residue_index,
794  all_indexes, radius) = sphere
795  if all_indexes is None:
796  all_indexes = (residue_index,)
797  pt = self.transform * xyz
798  yield ihm.model.Sphere(
799  asym_unit=chain.asym_unit,
800  seq_id_range=(all_indexes[0] - pmi_offset,
801  all_indexes[-1] - pmi_offset),
802  x=pt[0], y=pt[1], z=pt[2], radius=radius,
803  rmsf=self.get_rmsf(chain.orig_comp, all_indexes))
804 
805 
806 class _AllProtocols:
807  def __init__(self, simo):
808  self.simo = weakref.proxy(simo)
809  # Lists of Protocols by state
810  self.protocols = OrderedDict()
811 
812  def add_protocol(self, state):
813  """Add a new Protocol"""
814  if state not in self.protocols:
815  self.protocols[state] = []
816  p = ihm.protocol.Protocol()
817  self.simo.system.orphan_protocols.append(p)
818  self.protocols[state].append(p)
819 
820  def add_step(self, step, state):
821  """Add a ProtocolStep to the last Protocol of the given State"""
822  if state not in self.protocols:
823  self.add_protocol(state)
824  protocol = self.get_last_protocol(state)
825  if len(protocol.steps) == 0:
826  step.num_models_begin = 0
827  else:
828  step.num_models_begin = protocol.steps[-1].num_models_end
829  protocol.steps.append(step)
830  step.id = len(protocol.steps)
831  # Assume that protocol uses all currently-defined datasets
832  step.dataset_group = self.simo.all_datasets.get_all_group(state)
833 
834  def add_postproc(self, step, state):
835  """Add a postprocessing step to the last protocol"""
836  protocol = self.get_last_protocol(state)
837  if len(protocol.analyses) == 0:
838  protocol.analyses.append(ihm.analysis.Analysis())
839  protocol.analyses[-1].steps.append(step)
840 
841  def get_last_protocol(self, state):
842  """Return the most recently-added _Protocol"""
843  return self.protocols[state][-1]
844 
845 
846 class _AllStartingModels:
847  def __init__(self, simo):
848  self.simo = simo
849  # dict of starting models (entire PDB files), collected from fragments,
850  # ordered by component name and state
851  self.models = OrderedDict()
852  self.output = IMP.pmi.output.Output()
853 
854  def add_pdb_fragment(self, fragment):
855  """Add a starting model PDB fragment."""
856  comp = fragment.component
857  state = fragment.state
858  if comp not in self.models:
859  self.models[comp] = OrderedDict()
860  if state not in self.models[comp]:
861  self.models[comp][state] = []
862  models = self.models[comp][state]
863  if len(models) == 0 \
864  or models[-1].fragments[0].pdbname != fragment.pdbname:
865  model = self._add_model(fragment)
866  models.append(model)
867  else:
868  # Break circular ref between fragment and model
869  models[-1].fragments.append(weakref.proxy(fragment))
870  # Update residue range to cover all fragments
871  # We don't use fragment.offset here because fragment numbering
872  # is already in seq_id, not PDB numbering
873  pmi_offset = models[-1].asym_unit.entity.pmi_offset
874  sid_begin = min(fragment.start - pmi_offset,
875  models[-1].asym_unit.seq_id_range[0])
876  sid_end = max(fragment.end - pmi_offset,
877  models[-1].asym_unit.seq_id_range[1])
878  models[-1].asym_unit = fragment.asym_unit.asym(sid_begin, sid_end)
879  fragment.starting_model = models[-1]
880 
881  def _add_model(self, f):
882  parser = ihm.metadata.PDBParser()
883  r = parser.parse_file(f.pdbname)
884 
885  self.simo._add_dataset(r['dataset'])
886  # We only want the templates that model the starting model chain
887  templates = r['templates'].get(f.chain, [])
888  for t in templates:
889  if t.alignment_file:
890  self.simo.system.locations.append(t.alignment_file)
891  if t.dataset:
892  self.simo._add_dataset(t.dataset)
893  source = r['entity_source'].get(f.chain)
894  if source:
895  f.asym_unit.entity.source = source
896  pmi_offset = f.asym_unit.entity.pmi_offset
897  m = _StartingModel(
898  asym_unit=f.asym_unit.asym.pmi_range(f.start, f.end),
899  dataset=r['dataset'], asym_id=f.chain,
900  templates=templates, offset=f.offset + pmi_offset,
901  metadata=r['metadata'],
902  software=r['software'][0] if r['software'] else None,
903  script_file=r['script'])
904  m.fragments = [weakref.proxy(f)]
905  return m
906 
907 
908 class _StartingModel(ihm.startmodel.StartingModel):
909  def get_seq_dif(self):
910  return self._seq_dif
911 
912  def get_atoms(self):
913  pmi_offset = self.asym_unit.entity.pmi_offset
914  mh = IMP.mmcif.data._StartingModelAtomHandler(self.templates,
915  self.asym_unit)
916  for f in self.fragments:
917  sel = IMP.atom.Selection(
918  f.starting_hier,
919  residue_indexes=list(range(f.start - f.offset,
920  f.end - f.offset + 1)))
921  for a in mh.get_ihm_atoms(sel.get_selected_particles(),
922  f.offset - pmi_offset):
923  yield a
924  self._seq_dif = mh._seq_dif
925 
926 
927 class _ReplicaExchangeAnalysisPostProcess(ihm.analysis.ClusterStep):
928  """Post processing using AnalysisReplicaExchange0 macro"""
929 
930  def __init__(self, rex, num_models_begin):
931  self.rex = rex
932  num_models_end = 0
933  for fname in self.get_all_stat_files():
934  with open(str(fname)) as fh:
935  num_models_end += len(fh.readlines())
936  super().__init__(
937  feature='RMSD', num_models_begin=num_models_begin,
938  num_models_end=num_models_end)
939 
940  def get_stat_file(self, cluster_num):
941  return self.rex._outputdir / ("cluster.%d" % cluster_num) / 'stat.out'
942 
943  def get_all_stat_files(self):
944  for i in range(self.rex._number_of_clusters):
945  yield self.get_stat_file(i)
946 
947 
948 class _ReplicaExchangeAnalysisEnsemble(ihm.model.Ensemble):
949  """Ensemble generated using AnalysisReplicaExchange0 macro"""
950 
951  num_models_deposited = None # Override base class property
952 
953  def __init__(self, pp, cluster_num, model_group, num_deposit):
954  with open(str(pp.get_stat_file(cluster_num))) as fh:
955  num_models = len(fh.readlines())
956  super().__init__(
957  num_models=num_models,
958  model_group=model_group, post_process=pp,
959  clustering_feature=pp.feature,
960  name=model_group.name)
961  self.cluster_num = cluster_num
962  self.num_models_deposited = num_deposit
963 
964  def get_rmsf_file(self, component):
965  return (self.post_process.rex._outputdir
966  / ('cluster.%d' % self.cluster_num)
967  / ('rmsf.%s.dat' % component))
968 
969  def load_rmsf(self, model, component):
970  fname = self.get_rmsf_file(component)
971  if fname.exists():
972  model.parse_rmsf_file(fname, component)
973 
974  def get_localization_density_file(self, fname):
975  return (self.post_process.rex._outputdir
976  / ('cluster.%d' % self.cluster_num)
977  / ('%s.mrc' % fname))
978 
979  def load_localization_density(self, state, fname, select_tuple,
980  asym_units):
981  fullpath = self.get_localization_density_file(fname)
982  if fullpath.exists():
983  details = "Localization density for %s %s" \
984  % (fname, self.model_group.name)
985  local_file = ihm.location.OutputFileLocation(str(fullpath),
986  details=details)
987  for s in select_tuple:
988  if isinstance(s, tuple) and len(s) == 3:
989  asym = asym_units[s[2]].pmi_range(s[0], s[1])
990  else:
991  asym = asym_units[s]
992  den = ihm.model.LocalizationDensity(file=local_file,
993  asym_unit=asym)
994  self.densities.append(den)
995 
996  def load_all_models(self, simo, state):
997  stat_fname = self.post_process.get_stat_file(self.cluster_num)
998  model_num = 0
999  with open(str(stat_fname)) as fh:
1000  stats = ast.literal_eval(fh.readline())
1001  # Correct path
1002  rmf_file = stat_fname.parent / ("%d.rmf3" % model_num)
1003  # todo: test with real PMI2 systems
1004  if rmf_file.exists():
1005  rh = RMF.open_rmf_file_read_only(str(rmf_file))
1006  system = state._pmi_object.system
1007  IMP.rmf.link_hierarchies(rh, [system.hier])
1008  IMP.rmf.load_frame(fh, RMF.FrameID(0))
1009  # todo: fill in other data from stat file, e.g. cross-link phi/psi
1010  yield stats
1011  model_num += 1
1012  if model_num >= self.num_models_deposited:
1013  return
1014 
1015  # todo: also support dRMS precision
1016  def _get_precision(self):
1017  precfile = (self.post_process.rex._outputdir /
1018  ("precision.%d.%d.out" % (self.cluster_num,
1019  self.cluster_num)))
1020  if not precfile.exists():
1021  return ihm.unknown
1022  # Fail if the precision.x.x.out file doesn't match the cluster
1023  r = re.compile(
1024  r'All .*/cluster.%d/ average centroid distance ([\d\.]+)'
1025  % self.cluster_num)
1026  with open(str(precfile)) as fh:
1027  for line in fh:
1028  m = r.match(line)
1029  if m:
1030  return float(m.group(1))
1031 
1032  precision = property(lambda self: self._get_precision(),
1033  lambda self, val: None)
1034 
1035 
1036 class _SimpleEnsemble(ihm.model.Ensemble):
1037  """Simple manually-created ensemble"""
1038 
1039  num_models_deposited = None # Override base class property
1040 
1041  def __init__(self, pp, model_group, num_models, drmsd,
1042  num_models_deposited, ensemble_file):
1043  super().__init__(
1044  model_group=model_group, post_process=pp, num_models=num_models,
1045  file=ensemble_file, precision=drmsd, name=model_group.name,
1046  clustering_feature='dRMSD')
1047  self.num_models_deposited = num_models_deposited
1048 
1049  def load_localization_density(self, state, component, asym, local_file):
1050  den = ihm.model.LocalizationDensity(file=local_file, asym_unit=asym)
1051  self.densities.append(den)
1052 
1053 
1054 class _CustomDNAAlphabet:
1055  """Custom DNA alphabet that maps A,C,G,T (rather than DA,DC,DG,DT
1056  as in python-ihm)"""
1057  _comps = dict([cc.code_canonical, cc]
1058  for cc in ihm.DNAAlphabet._comps.values())
1059 
1060 
1061 class _EntityMapper(dict):
1062  """Handle mapping from IMP components (without copy number) to CIF
1063  entities. Multiple components may map to the same entity if they
1064  share sequence."""
1065  def __init__(self, system):
1066  super().__init__()
1067  self._sequence_dict = {}
1068  self._entities = []
1069  self.system = system
1070 
1071  def _get_alphabet(self, alphabet):
1072  """Map a PMI alphabet to an IHM alphabet"""
1073  # todo: we should probably ignore alphabets entirely and use
1074  # residue types directly (e.g. we shouldn't compare one-letter
1075  # code sequence to determine if an Entity is unique)
1076  alphabet_map = {None: ihm.LPeptideAlphabet,
1077  IMP.pmi.alphabets.amino_acid: ihm.LPeptideAlphabet,
1078  IMP.pmi.alphabets.rna: ihm.RNAAlphabet,
1079  IMP.pmi.alphabets.dna: _CustomDNAAlphabet}
1080  if alphabet in alphabet_map:
1081  return alphabet_map[alphabet]
1082  else:
1083  raise TypeError("Don't know how to handle %s" % alphabet)
1084 
1085  def add(self, component_name, sequence, offset, alphabet):
1086  def entity_seq(sequence):
1087  # Map X to UNK
1088  if 'X' in sequence:
1089  return ['UNK' if s == 'X' else s for s in sequence]
1090  else:
1091  return sequence
1092  if sequence not in self._sequence_dict:
1093  # Use the name of the first component, stripped of any copy number,
1094  # as the description of the entity
1095  d = component_name.split("@")[0].split(".")[0]
1096  entity = Entity(entity_seq(sequence), description=d,
1097  pmi_offset=offset,
1098  alphabet=self._get_alphabet(alphabet))
1099  self.system.entities.append(entity)
1100  self._sequence_dict[sequence] = entity
1101  self[component_name] = self._sequence_dict[sequence]
1102 
1103 
1104 class _TransformedComponent:
1105  def __init__(self, name, original, transform):
1106  self.name, self.original, self.transform = name, original, transform
1107 
1108 
1109 class _SimpleRef:
1110  """Class with similar interface to weakref.ref, but keeps a strong ref"""
1111  def __init__(self, ref):
1112  self.ref = ref
1113 
1114  def __call__(self):
1115  return self.ref
1116 
1117 
1118 class _State(ihm.model.State):
1119  """Representation of a single state in the system."""
1120 
1121  def __init__(self, pmi_object, po):
1122  # Point to the PMI object for this state. Use a weak reference
1123  # since the state object typically points to us too, so we need
1124  # to break the reference cycle. In PMI1 this will be a
1125  # Representation object; in PMI2 it is the PMI2 State object itself.
1126  self._pmi_object = weakref.proxy(pmi_object)
1127  if hasattr(pmi_object, 'state'):
1128  # Need a strong ref to pmi_object.state to prevent it from being
1129  # cleaned up when doing PMI1 multistate modeling
1130  self._pmi_state = _SimpleRef(pmi_object.state)
1131  else:
1132  self._pmi_state = weakref.ref(pmi_object)
1133  # Preserve PMI state name
1134  old_name = self.name
1135  super().__init__(experiment_type='Fraction of bulk')
1136  self.name = old_name
1137 
1138  # The assembly of all components modeled by IMP in this state.
1139  # This may be smaller than the complete assembly.
1140  self.modeled_assembly = ihm.Assembly(
1141  name="Modeled assembly",
1142  description=self.get_postfixed_name(
1143  "All components modeled by IMP"))
1144  po.system.orphan_assemblies.append(self.modeled_assembly)
1145 
1146  self.all_modeled_components = []
1147 
1148  def __hash__(self):
1149  return hash(self._pmi_state())
1150 
1151  def __eq__(self, other):
1152  return self._pmi_state() == other._pmi_state()
1153 
1154  def add_model_group(self, group):
1155  self.append(group)
1156  return group
1157 
1158  def get_prefixed_name(self, name):
1159  """Prefix the given name with the state name, if available."""
1160  if self.short_name:
1161  return self.short_name + ' ' + name
1162  else:
1163  # Can't use capitalize() since that un-capitalizes everything
1164  # but the first letter
1165  return name[0].upper() + name[1:] if name else ''
1166 
1167  def get_postfixed_name(self, name):
1168  """Postfix the given name with the state name, if available."""
1169  if self.short_name:
1170  return "%s in state %s" % (name, self.short_name)
1171  else:
1172  return name
1173 
1174  short_name = property(lambda self: self._pmi_state().short_name)
1175  long_name = property(lambda self: self._pmi_state().long_name)
1176 
1177  def __get_name(self):
1178  return self._pmi_state().long_name
1179 
1180  def __set_name(self, val):
1181  self._pmi_state().long_name = val
1182 
1183  name = property(__get_name, __set_name)
1184 
1185 
1186 class Entity(ihm.Entity):
1187  """A single entity in the system. This contains information (such as
1188  database identifiers) specific to a particular sequence rather than
1189  a copy (for example, when modeling a homodimer, two AsymUnits will
1190  point to the same Entity).
1191 
1192  This functions identically to the base ihm.Entity class, but it
1193  allows identifying residues by either the PMI numbering scheme
1194  (which is always contiguous starting at 1, covering the entire
1195  sequence in the FASTA files, or the IHM scheme (seq_id, which also
1196  starts at 1, but which only covers the modeled subset of the full
1197  sequence, with non-modeled N-terminal or C-terminal residues
1198  removed). The actual offset (which is the integer to be added to the
1199  IHM numbering to get PMI numbering, or equivalently the number of
1200  not-represented N-terminal residues in the PMI sequence) is
1201  available in the `pmi_offset` member."""
1202  def __init__(self, sequence, pmi_offset, *args, **keys):
1203  # Offset between PMI numbering and IHM; <pmi_#> = <ihm_#> + pmi_offset
1204  # (pmi_offset is also the number of N-terminal gaps in the FASTA file)
1205  self.pmi_offset = pmi_offset
1206  super().__init__(sequence, *args, **keys)
1207 
1208  def pmi_residue(self, res_id):
1209  """Return a single IHM residue indexed using PMI numbering"""
1210  return self.residue(res_id - self.pmi_offset)
1211 
1212  def pmi_range(self, res_id_begin, res_id_end):
1213  """Return a range of IHM residues indexed using PMI numbering"""
1214  off = self.pmi_offset
1215  return self(res_id_begin - off, res_id_end - off)
1216 
1217 
1218 class AsymUnit(ihm.AsymUnit):
1219  """A single asymmetric unit in the system. This roughly corresponds to
1220  a single PMI subunit (sequence, copy, or clone).
1221 
1222  This functions identically to the base ihm.AsymUnit class, but it
1223  allows identifying residues by either the PMI numbering scheme
1224  (which is always contiguous starting at 1, covering the entire
1225  sequence in the FASTA files, or the IHM scheme (seq_id, which also
1226  starts at 1, but which only covers the modeled subset of the full
1227  sequence, with non-modeled N-terminal or C-terminal residues
1228  removed).
1229 
1230  The `entity` member of this class points to an Entity object, which
1231  contains information (such as database identifiers) specific to
1232  a particular sequence rather than a copy (for example, when modeling
1233  a homodimer, two AsymUnits will point to the same Entity).
1234  """
1235 
1236  def __init__(self, entity, *args, **keys):
1237  super().__init__(
1238  entity, auth_seq_id_map=entity.pmi_offset, *args, **keys)
1239 
1240  def pmi_residue(self, res_id):
1241  """Return a single IHM residue indexed using PMI numbering"""
1242  return self.residue(res_id - self.entity.pmi_offset)
1243 
1244  def pmi_range(self, res_id_begin, res_id_end):
1245  """Return a range of IHM residues indexed using PMI numbering"""
1246  off = self.entity.pmi_offset
1247  return self(res_id_begin - off, res_id_end - off)
1248 
1249 
1251  """Class to encode a modeling protocol as mmCIF.
1252 
1253  IMP has basic support for writing out files in mmCIF format, for
1254  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
1255  After creating an instance of this class, attach it to an
1256  IMP.pmi.topology.System object. After this, any
1257  generated models and metadata are automatically collected in the
1258  `system` attribute, which is an
1259  [ihm.System](https://python-ihm.readthedocs.io/en/latest/main.html#ihm.System) object.
1260  Once the protocol is complete, call finalize() to make sure `system`
1261  contains everything, then use the
1262  [python-ihm API](https://python-ihm.readthedocs.io/en/latest/dumper.html#ihm.dumper.write)
1263  to write out files in mmCIF or BinaryCIF format.
1264 
1265  Each PMI subunit will be mapped to an IHM AsymUnit class which contains the
1266  subset of the sequence that was represented. Use the `asym_units` dict to
1267  get this object given a PMI subunit name. Each unique sequence will be
1268  mapped to an IHM Entity class (for example when modeling a homodimer
1269  there will be two AsymUnits which both point to the same Entity). Use
1270  the `entities` dict to get this object from a PMI subunit name.
1271 
1272  See also Entity, AsymUnit, get_handlers(), get_dumpers().
1273  """ # noqa: E501
1274  def __init__(self):
1275  # Ultimately, collect data in an ihm.System object
1276  self.system = ihm.System()
1277  self._state_group = ihm.model.StateGroup()
1278  self.system.state_groups.append(self._state_group)
1279 
1280  self._state_ensemble_offset = 0
1281  self._main_script = os.path.abspath(sys.argv[0])
1282 
1283  # Point to the main modeling script
1284  loc = ihm.location.WorkflowFileLocation(
1285  path=self._main_script,
1286  details="The main integrative modeling script")
1287  self.system.locations.append(loc)
1288 
1289  self._states = {}
1290  self.__asym_states = {}
1291  self._working_directory = os.getcwd()
1292  self.default_representation = self.create_representation(
1293  "Default representation")
1294  self.entities = _EntityMapper(self.system)
1295  # Mapping from component names to ihm.AsymUnit
1296  self.asym_units = {}
1297  self._all_components = {}
1298  self.all_modeled_components = []
1299  self._transformed_components = []
1300  self.sequence_dict = {}
1301 
1302  # Common geometric objects used in PMI systems
1303  self._xy_plane = ihm.geometry.XYPlane()
1304  self._xz_plane = ihm.geometry.XZPlane()
1305  self._z_axis = ihm.geometry.ZAxis()
1306  self._center_origin = ihm.geometry.Center(0, 0, 0)
1307  self._identity_transform = ihm.geometry.Transformation.identity()
1308 
1309  # Coordinates to exclude
1310  self._exclude_coords = {}
1311 
1312  self.all_representations = _AllModelRepresentations(self)
1313  self.all_protocols = _AllProtocols(self)
1314  self.all_datasets = _AllDatasets(self.system)
1315  self.all_starting_models = _AllStartingModels(self)
1316 
1317  self.all_software = _AllSoftware(self.system)
1318 
1319  def create_representation(self, name):
1320  """Create a new Representation and return it. This can be
1321  passed to add_model(), add_bead_element() or add_pdb_element()."""
1322  r = ihm.representation.Representation(name=name)
1323  self.system.orphan_representations.append(r)
1324  return r
1325 
1326  def exclude_coordinates(self, component, seqrange):
1327  """Don't record coordinates for the given domain.
1328  Coordinates for the given domain (specified by a component name
1329  and a 2-element tuple giving the start and end residue numbers)
1330  will be excluded from the mmCIF file. This can be used to exclude
1331  parts of the structure that weren't well resolved in modeling.
1332  Any bead or residue that lies wholly within this range will be
1333  excluded. Multiple ranges for a given component can be excluded
1334  by calling this method multiple times."""
1335  if component not in self._exclude_coords:
1336  self._exclude_coords[component] = []
1337  self._exclude_coords[component].append(seqrange)
1338 
1339  def _is_excluded(self, component, start, end):
1340  """Return True iff this chunk of sequence should be excluded"""
1341  for seqrange in self._exclude_coords.get(component, ()):
1342  if start >= seqrange[0] and end <= seqrange[1]:
1343  return True
1344 
1345  def _add_state(self, state):
1346  """Create a new state and return a pointer to it."""
1347  self._state_ensemble_offset = len(self.system.ensembles)
1348  s = _State(state, self)
1349  self._state_group.append(s)
1350  self._last_state = s
1351  return s
1352 
1353  def _get_chain_for_component(self, name, output):
1354  """Get the chain ID for a component, if any."""
1355  # todo: handle multiple copies
1356  if name in self.asym_units:
1357  return self.asym_units[name]._id
1358  else:
1359  # A non-modeled component doesn't have a chain ID
1360  return None
1361 
1362  def _get_assembly_comps(self, assembly):
1363  """Get the names of the components in the given assembly"""
1364  # component name = asym_unit.details
1365  comps = {}
1366  for ca in assembly:
1367  comps[ca.details] = None
1368  return comps
1369 
1370  def create_transformed_component(self, state, name, original, transform):
1371  """Make a new component that's a transformed copy of another.
1372  All representation for the existing component is copied to the
1373  new one."""
1374  assembly_comps = self._get_assembly_comps(state.modeled_assembly)
1375  if name in assembly_comps:
1376  raise ValueError("Component %s already exists" % name)
1377  elif original not in assembly_comps:
1378  raise ValueError("Original component %s does not exist" % original)
1379  self.create_component(state, name, True)
1380  self.add_component_sequence(state, name, self.sequence_dict[original])
1381  self._transformed_components.append(_TransformedComponent(
1382  name, original, transform))
1383  self.all_representations.copy_component(state, name, original,
1384  self.asym_units[name])
1385 
1386  def create_component(self, state, name, modeled, asym_name=None):
1387  if asym_name is None:
1388  asym_name = name
1389  new_comp = name not in self._all_components
1390  self._all_components[name] = None
1391  if modeled:
1392  state.all_modeled_components.append(name)
1393  if asym_name not in self.asym_units:
1394  # assign asym once we get sequence
1395  self.asym_units[asym_name] = None
1396  if new_comp:
1397  self.all_modeled_components.append(name)
1398 
1399  def add_component_sequence(self, state, name, seq, asym_name=None,
1400  alphabet=None):
1401  if asym_name is None:
1402  asym_name = name
1403 
1404  if name in self.sequence_dict:
1405  if self.sequence_dict[name] != seq:
1406  raise ValueError("Sequence mismatch for component %s" % name)
1407  else:
1408  self.sequence_dict[name] = seq
1409  # Offset is always zero to start with; this may be modified
1410  # later in finalize_build() if any non-modeled N-terminal
1411  # residues are removed
1412  self.entities.add(name, seq, 0, alphabet)
1413  if asym_name in self.asym_units:
1414  if self.asym_units[asym_name] is None:
1415  # Set up a new asymmetric unit for this component
1416  entity = self.entities[name]
1417  asym = AsymUnit(entity, details=asym_name)
1418  self.system.asym_units.append(asym)
1419  self.asym_units[asym_name] = asym
1420  state.modeled_assembly.append(self.asym_units[asym_name])
1421 
1422  def finalize_build(self):
1423  """Called immediately after the PMI system is built"""
1424  for entity in self.system.entities:
1425  _trim_unrep_termini(entity, self.system.asym_units,
1426  self.system.orphan_representations)
1427 
1428  def _add_restraint_model_fits(self):
1429  """Add fits to restraints for all known models"""
1430  for group, m in self.system._all_models():
1431  if m._is_restrained:
1432  for r in self.system.restraints:
1433  if hasattr(r, 'add_fits_from_model_statfile'):
1434  r.add_fits_from_model_statfile(m)
1435 
1436  def finalize(self):
1437  """Do any final processing on the class hierarchy.
1438  After calling this method, the `system` member (an instance
1439  of `ihm.System`) completely reproduces the PMI modeling, and
1440  can be written out to an mmCIF file with `ihm.dumper.write`,
1441  and/or modified using the ihm API."""
1442  self._add_restraint_model_fits()
1443 
1444  def add_pdb_element(self, state, name, start, end, offset, pdbname,
1445  chain, hier, representation=None):
1446  if self._is_excluded(name, start, end):
1447  return
1448  if representation is None:
1449  representation = self.default_representation
1450  asym = self.asym_units[name]
1451  p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
1452  hier, asym)
1453  self.all_representations.add_fragment(state, representation, p)
1454  self.all_starting_models.add_pdb_fragment(p)
1455 
1456  def add_bead_element(self, state, name, start, end, num, hier,
1457  representation=None):
1458  if self._is_excluded(name, start, end):
1459  return
1460  if representation is None:
1461  representation = self.default_representation
1462  asym = self.asym_units[name]
1463  pmi_offset = asym.entity.pmi_offset
1464  b = _BeadsFragment(state, name, start - pmi_offset, end - pmi_offset,
1465  num, hier, asym)
1466  self.all_representations.add_fragment(state, representation, b)
1467 
1468  def get_cross_link_group(self, pmi_restraint):
1469  r = _CrossLinkRestraint(pmi_restraint)
1470  self.system.restraints.append(r)
1471  self._add_restraint_dataset(r) # so that all-dataset group works
1472  return r
1473 
1474  def add_experimental_cross_link(self, r1, c1, r2, c2, rsr):
1475  if c1 not in self._all_components or c2 not in self._all_components:
1476  # Crosslink refers to a component we didn't model
1477  # As a quick hack, just ignore it.
1478  # todo: need to add an entity for this anyway (so will need the
1479  # sequence, and add to struct_assembly)
1480  return None
1481  e1 = self.entities[c1]
1482  e2 = self.entities[c2]
1483  xl = ihm.restraint.ExperimentalCrossLink(residue1=e1.pmi_residue(r1),
1484  residue2=e2.pmi_residue(r2))
1485  rsr.experimental_cross_links.append([xl])
1486  return xl
1487 
1488  def add_cross_link(self, state, ex_xl, p1, p2, length, sigma1_p, sigma2_p,
1489  psi_p, rsr):
1490  # Map p1/p2 to asym units
1491  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1492  d = ihm.restraint.UpperBoundDistanceRestraint(length)
1493 
1494  if _get_by_residue(p1) and _get_by_residue(p2):
1495  cls = _ResidueCrossLink
1496  else:
1497  cls = _FeatureCrossLink
1498  xl = cls(ex_xl, asym1=asym[p1], asym2=asym[p2], distance=d,
1499  restrain_all=False)
1500  # Needed to look up fits later
1501  xl.psi_p, xl.sigma1_p, xl.sigma2_p = psi_p, sigma1_p, sigma2_p
1502  rsr.cross_links.append(xl)
1503 
1504  def add_replica_exchange(self, state, rex):
1505  # todo: allow for metadata to say how many replicas were used in the
1506  # actual experiment, and how many independent runs were carried out
1507  # (use these as multipliers to get the correct total number of
1508  # output models)
1509  step = _ReplicaExchangeProtocolStep(state, rex)
1510  step.software = self.all_software.pmi
1511  self.all_protocols.add_step(step, state)
1512 
1513  def _add_simple_dynamics(self, num_models_end, method):
1514  # Always assumed that we're dealing with the last state
1515  state = self._last_state
1516  self.all_protocols.add_step(_SimpleProtocolStep(state, num_models_end,
1517  method), state)
1518 
1519  def _add_protocol(self):
1520  # Always assumed that we're dealing with the last state
1521  state = self._last_state
1522  self.all_protocols.add_protocol(state)
1523 
1524  def _add_dataset(self, dataset):
1525  return self.all_datasets.add(self._last_state, dataset)
1526 
1527  def _add_restraint_dataset(self, restraint):
1528  return self.all_datasets.add_restraint(self._last_state, restraint)
1529 
1530  def _add_simple_postprocessing(self, num_models_begin, num_models_end):
1531  # Always assumed that we're dealing with the last state
1532  state = self._last_state
1533  pp = ihm.analysis.ClusterStep('RMSD', num_models_begin, num_models_end)
1534  self.all_protocols.add_postproc(pp, state)
1535  return pp
1536 
1537  def _add_no_postprocessing(self, num_models):
1538  # Always assumed that we're dealing with the last state
1539  state = self._last_state
1540  pp = ihm.analysis.EmptyStep()
1541  pp.num_models_begin = pp.num_models_end = num_models
1542  self.all_protocols.add_postproc(pp, state)
1543  return pp
1544 
1545  def _add_simple_ensemble(self, pp, name, num_models, drmsd,
1546  num_models_deposited, localization_densities,
1547  ensemble_file):
1548  """Add an ensemble generated by ad hoc methods (not using PMI).
1549  This is currently only used by the Nup84 system."""
1550  # Always assumed that we're dealing with the last state
1551  state = self._last_state
1552  group = ihm.model.ModelGroup(name=state.get_postfixed_name(name))
1553  state.add_model_group(group)
1554  if ensemble_file:
1555  self.system.locations.append(ensemble_file)
1556  e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
1557  ensemble_file)
1558  self.system.ensembles.append(e)
1559  for c in state.all_modeled_components:
1560  den = localization_densities.get(c, None)
1561  if den:
1562  e.load_localization_density(state, c, self.asym_units[c], den)
1563  return e
1564 
1565  def set_ensemble_file(self, i, location):
1566  """Point a previously-created ensemble to an 'all-models' file.
1567  This could be a trajectory such as DCD, an RMF, or a multimodel
1568  PDB file."""
1569  self.system.locations.append(location)
1570  # Ensure that we point to an ensemble related to the current state
1571  ind = i + self._state_ensemble_offset
1572  self.system.ensembles[ind].file = location
1573 
1574  def add_replica_exchange_analysis(self, state, rex, density_custom_ranges):
1575  # todo: add prefilter as an additional postprocess step (complication:
1576  # we don't know how many models it filtered)
1577  # todo: postpone rmsf/density localization extraction until after
1578  # relevant methods are called (currently it is done from the
1579  # constructor)
1580  protocol = self.all_protocols.get_last_protocol(state)
1581  num_models = protocol.steps[-1].num_models_end
1582  pp = _ReplicaExchangeAnalysisPostProcess(rex, num_models)
1583  pp.software = self.all_software.pmi
1584  self.all_protocols.add_postproc(pp, state)
1585  for i in range(rex._number_of_clusters):
1586  group = ihm.model.ModelGroup(name=state.get_prefixed_name(
1587  'cluster %d' % (i + 1)))
1588  state.add_model_group(group)
1589  # todo: make # of models to deposit configurable somewhere
1590  e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
1591  self.system.ensembles.append(e)
1592  # Add localization density info if available
1593  for fname, stuple in sorted(density_custom_ranges.items()):
1594  e.load_localization_density(state, fname, stuple,
1595  self.asym_units)
1596  for stats in e.load_all_models(self, state):
1597  m = self.add_model(group)
1598  # Since we currently only deposit 1 model, it is the
1599  # best scoring one
1600  m.name = 'Best scoring model'
1601  m.stats = stats
1602  # Add RMSF info if available
1603  for c in state.all_modeled_components:
1604  e.load_rmsf(m, c)
1605 
1606  def _get_subassembly(self, comps, name, description):
1607  """Get an Assembly consisting of the given components.
1608  `compdict` is a dictionary of the components to add, where keys
1609  are the component names and values are the sequence ranges (or
1610  None to use all residues in the component)."""
1611  asyms = []
1612  for comp, seqrng in comps.items():
1613  a = self.asym_units[comp]
1614  asyms.append(a if seqrng is None else a(*seqrng))
1615 
1616  a = ihm.Assembly(asyms, name=name, description=description)
1617  return a
1618 
1619  def _add_foxs_restraint(self, model, comp, seqrange, dataset, rg, chi,
1620  details):
1621  """Add a basic FoXS fit. This is largely intended for use from the
1622  NPC application."""
1623  assembly = self._get_subassembly(
1624  {comp: seqrange},
1625  name="SAXS subassembly",
1626  description="All components that fit SAXS data")
1627  r = ihm.restraint.SASRestraint(
1628  dataset, assembly, segment=False,
1629  fitting_method='FoXS', fitting_atom_type='Heavy atoms',
1630  multi_state=False, radius_of_gyration=rg, details=details)
1631  r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi)
1632  self.system.restraints.append(r)
1633  self._add_restraint_dataset(r) # so that all-dataset group works
1634 
1635  def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
1636  image_resolution, projection_number,
1637  micrographs_number):
1638  r = _EM2DRestraint(state, r, i, resolution, pixel_size,
1639  image_resolution, projection_number,
1640  micrographs_number)
1641  self.system.restraints.append(r)
1642  self._add_restraint_dataset(r) # so that all-dataset group works
1643 
1644  def add_em3d_restraint(self, state, target_ps, densities, pmi_restraint):
1645  # todo: need to set allow_duplicates on this dataset?
1646  r = _EM3DRestraint(self, state, pmi_restraint, target_ps, densities)
1647  self.system.restraints.append(r)
1648  self._add_restraint_dataset(r) # so that all-dataset group works
1649 
1650  def add_zaxial_restraint(self, state, ps, lower_bound, upper_bound,
1651  sigma, pmi_restraint):
1652  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1653  sigma, pmi_restraint, self._xy_plane)
1654 
1655  def add_yaxial_restraint(self, state, ps, lower_bound, upper_bound,
1656  sigma, pmi_restraint):
1657  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1658  sigma, pmi_restraint, self._xz_plane)
1659 
1660  def add_xyradial_restraint(self, state, ps, lower_bound, upper_bound,
1661  sigma, pmi_restraint):
1662  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1663  sigma, pmi_restraint, self._z_axis)
1664 
1665  def _add_geometric_restraint(self, state, ps, lower_bound, upper_bound,
1666  sigma, pmi_restraint, geom):
1667  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1668  r = _GeometricRestraint(
1669  self, state, pmi_restraint, geom, asym.get_feature(ps),
1670  ihm.restraint.LowerUpperBoundDistanceRestraint(lower_bound,
1671  upper_bound),
1672  sigma)
1673  self.system.restraints.append(r)
1674  self._add_restraint_dataset(r) # so that all-dataset group works
1675 
1676  def _get_membrane(self, tor_R, tor_r, tor_th):
1677  """Get an object representing a half-torus membrane"""
1678  if not hasattr(self, '_seen_membranes'):
1679  self._seen_membranes = {}
1680  # If a membrane already exists with all dimensions within 0.01 of
1681  # this one, reuse it
1682  membrane_id = tuple(int(x * 100.) for x in (tor_R, tor_r, tor_th))
1683  if membrane_id not in self._seen_membranes:
1684  m = ihm.geometry.HalfTorus(
1685  center=self._center_origin,
1686  transformation=self._identity_transform,
1687  major_radius=tor_R, minor_radius=tor_r, thickness=tor_th,
1688  inner=True, name='Membrane')
1689  self._seen_membranes[membrane_id] = m
1690  return self._seen_membranes[membrane_id]
1691 
1692  def add_membrane_surface_location_restraint(
1693  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1694  self._add_membrane_restraint(
1695  state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint,
1696  ihm.restraint.UpperBoundDistanceRestraint(0.))
1697 
1698  def add_membrane_exclusion_restraint(
1699  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1700  self._add_membrane_restraint(
1701  state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint,
1702  ihm.restraint.LowerBoundDistanceRestraint(0.))
1703 
1704  def _add_membrane_restraint(self, state, ps, tor_R, tor_r, tor_th,
1705  sigma, pmi_restraint, rsr):
1706  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1707  r = _GeometricRestraint(
1708  self, state, pmi_restraint,
1709  self._get_membrane(tor_R, tor_r, tor_th), asym.get_feature(ps),
1710  rsr, sigma)
1711  self.system.restraints.append(r)
1712  self._add_restraint_dataset(r) # so that all-dataset group works
1713 
1714  def add_model(self, group, assembly=None, representation=None):
1715  state = self._last_state
1716  if representation is None:
1717  representation = self.default_representation
1718  protocol = self.all_protocols.get_last_protocol(state)
1719  m = _Model(state.prot, self, protocol,
1720  assembly if assembly else state.modeled_assembly,
1721  representation)
1722  group.append(m)
1723  return m
1724 
1725 
1727  """Get custom python-ihm dumpers for writing PMI to from mmCIF.
1728  This returns a list of custom dumpers that can be passed as all or
1729  part of the `dumpers` argument to ihm.dumper.write(). They add
1730  PMI-specific information to mmCIF or BinaryCIF files written out
1731  by python-ihm."""
1732  return [_ReplicaExchangeProtocolDumper]
1733 
1734 
1736  """Get custom python-ihm handlers for reading PMI data from mmCIF.
1737  This returns a list of custom handlers that can be passed as all or
1738  part of the `handlers` argument to ihm.reader.read(). They read
1739  PMI-specific information from mmCIF or BinaryCIF files read in
1740  by python-ihm."""
1741  return [_ReplicaExchangeProtocolHandler]
1742 
1743 
1744 class GMMParser(ihm.metadata.Parser):
1745  """Extract metadata from an EM density GMM file."""
1746 
1747  def parse_file(self, filename):
1748  """Extract metadata from `filename`.
1749  @return a dict with key `dataset` pointing to the GMM file and
1750  `number_of_gaussians` to the number of GMMs (or None)"""
1751  loc = ihm.location.InputFileLocation(
1752  filename,
1753  details="Electron microscopy density map, "
1754  "represented as a Gaussian Mixture Model (GMM)")
1755  # A 3DEM restraint's dataset ID uniquely defines the mmCIF
1756  # restraint, so we need to allow duplicates
1757  loc._allow_duplicates = True
1758  d = ihm.dataset.EMDensityDataset(loc)
1759  ret = {'dataset': d, 'number_of_gaussians': None}
1760 
1761  with open(filename) as fh:
1762  for line in fh:
1763  if line.startswith('# data_fn: '):
1764  p = ihm.metadata.MRCParser()
1765  fn = line[11:].rstrip('\r\n')
1766  dataset = p.parse_file(os.path.join(
1767  os.path.dirname(filename), fn))['dataset']
1768  ret['dataset'].parents.append(dataset)
1769  elif line.startswith('# ncenters: '):
1770  ret['number_of_gaussians'] = int(line[12:])
1771  return ret
1772 
1773 
1774 def _trim_unrep_termini(entity, asyms, representations):
1775  """Trim Entity sequence to only cover represented residues.
1776 
1777  PDB policy is for amino acid Entity sequences to be polymers (so
1778  they should include any loops or other gaps in the midst of the
1779  sequence) but for the termini to be trimmed of any not-modeled
1780  residues. Here, we modify the Entity sequence to remove any parts
1781  that are not included in any representation. This may change the
1782  numbering if any N-terminal residues are removed, and thus the offset
1783  between PMI and IHM numbering, as both count from 1."""
1784  rep_range = None
1785  for rep in representations:
1786  for seg in rep:
1787  if seg.asym_unit.entity is entity:
1788  seg_range = seg.asym_unit.seq_id_range
1789  if rep_range is None:
1790  rep_range = list(seg_range)
1791  else:
1792  rep_range[0] = min(rep_range[0], seg_range[0])
1793  rep_range[1] = max(rep_range[1], seg_range[1])
1794  # Do nothing if no representations or if everything is represented
1795  if rep_range is None or rep_range == [1, len(entity.sequence)]:
1796  return
1797  # Update offset (= number of unrepresented N terminal entity residues)
1798  # in entity and all asyms
1799  pmi_offset = rep_range[0] - 1
1800  entity.pmi_offset = pmi_offset
1801  for asym in asyms:
1802  if asym.entity is entity:
1803  asym.auth_seq_id_map = entity.pmi_offset
1804  # Modify entity sequence to only include represented residues
1805  entity.sequence = entity.sequence[rep_range[0] - 1:rep_range[1]]
1806 
1807  # If N terminal deletions, we must fix the numbering of all segments
1808  # and starting models
1809  if pmi_offset != 0:
1810  for rep in representations:
1811  for seg in rep:
1812  if seg.asym_unit.entity is entity:
1813  seg_range = seg.asym_unit.seq_id_range
1814  seg.asym_unit.seq_id_range = (seg_range[0] - pmi_offset,
1815  seg_range[1] - pmi_offset)
1816  if seg.starting_model:
1817  model = seg.starting_model
1818  seg_range = model.asym_unit.seq_id_range
1819  model.asym_unit.seq_id_range = \
1820  (seg_range[0] - pmi_offset,
1821  seg_range[1] - pmi_offset)
1822  model.offset = model.offset - pmi_offset
Select non water and non hydrogen atoms.
Definition: pdb.h:314
def get_handlers
Get custom python-ihm handlers for reading PMI data from mmCIF.
Definition: mmcif.py:1735
def create_representation
Create a new Representation and return it.
Definition: mmcif.py:1319
def create_transformed_component
Make a new component that's a transformed copy of another.
Definition: mmcif.py:1370
Simple 3D transformation class.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:158
def exclude_coordinates
Don't record coordinates for the given domain.
Definition: mmcif.py:1329
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
def finalize_build
Called immediately after the PMI system is built.
Definition: mmcif.py:1422
Class to encode a modeling protocol as mmCIF.
Definition: mmcif.py:1270
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:245
def parse_file
Extract metadata from filename.
Definition: mmcif.py:1748
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: mmcif.py:1212
Miscellaneous utilities.
Definition: tools.py:1
Extract metadata from an EM density GMM file.
Definition: mmcif.py:1744
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:749
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: mmcif.py:1240
void read_pdb(TextInput input, int model, Hierarchy h)
A single asymmetric unit in the system.
Definition: mmcif.py:1231
A single entity in the system.
Definition: mmcif.py:1186
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
Definition: mmcif.py:1565
Classes to represent data structures used in mmCIF.
Definition: data.py:1
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: mmcif.py:1244
void add_restraint(RMF::FileHandle fh, Restraint *hs)
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def finalize
Do any final processing on the class hierarchy.
Definition: mmcif.py:1436
Base class for capturing a modeling protocol.
Definition: output.py:41
Basic utilities for handling cryo-electron microscopy 3D density maps.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
def get_dumpers
Get custom python-ihm dumpers for writing PMI to from mmCIF.
Definition: mmcif.py:1726
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:199
3D rotation class.
Definition: Rotation3D.h:52
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
A decorator for a residue.
Definition: Residue.h:137
Basic functionality that is expected to be used by a wide variety of IMP users.
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: mmcif.py:1208
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
VectorD< 3 > Vector3D
Definition: VectorD.h:408
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:256
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...