IMP logo
IMP Reference Guide  2.10.0
The Integrative Modeling Platform
mmcif.py
1 """@namespace IMP.pmi.mmcif
2  @brief Support for the mmCIF file format.
3 
4  IMP has basic support for writing out files in mmCIF format, for
5  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
6  mmCIF files are currently generated by creating an
7  IMP.pmi.mmcif.ProtocolOutput class, and attaching it to an
8  IMP.pmi.representation.Representation object, after which any
9  generated models and metadata are collected and output as mmCIF.
10 """
11 
12 from __future__ import print_function
13 import copy
14 import IMP.core
15 import IMP.algebra
16 import IMP.atom
17 import IMP.em
18 import IMP.isd
20 import IMP.pmi.tools
21 from IMP.pmi.tools import OrderedDict
22 import IMP.pmi.output
23 import IMP.mmcif.data
24 import re
25 import ast
26 import sys
27 import os
28 import textwrap
29 import weakref
30 import operator
31 import itertools
32 import ihm.location
33 import ihm.dataset
34 import ihm.dumper
35 import ihm.metadata
36 import ihm.startmodel
37 import ihm.model
38 import ihm.protocol
39 import ihm.analysis
40 import ihm.representation
41 import ihm.geometry
42 
43 def _assign_id(obj, seen_objs, obj_by_id):
44  """Assign a unique ID to obj, and track all ids in obj_by_id."""
45  if obj not in seen_objs:
46  if not hasattr(obj, 'id'):
47  obj_by_id.append(obj)
48  obj.id = len(obj_by_id)
49  seen_objs[obj] = obj.id
50  else:
51  obj.id = seen_objs[obj]
52 
53 
54 def _get_by_residue(p):
55  """Determine whether the given particle represents a specific residue
56  or a more coarse-grained object."""
58 
59 
60 class _ComponentMapper(object):
61  """Map a Particle to a component name"""
62  def __init__(self, prot):
63  self.o = IMP.pmi.output.Output()
64  self.prot = prot
65  self.name = 'cif-output'
66  self.o.dictionary_pdbs[self.name] = self.prot
67  self.o._init_dictchain(self.name, self.prot, multichar_chain=True)
68 
69  def __getitem__(self, p):
70  protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
71  return protname
72 
73 
74 class _AsymMapper(object):
75  """Map a Particle to an asym_unit"""
76  def __init__(self, simo, prot):
77  self.simo = simo
78  self._cm = _ComponentMapper(prot)
79  self._seen_ranges = {}
80 
81  def __getitem__(self, p):
82  protname = self._cm[p]
83  return self.simo.asym_units[protname]
84 
85  def get_feature(self, ps):
86  """Get an ihm.restraint.Feature that covers the given particles"""
87  # todo: handle things other than residues
88  rngs = []
89  for p in ps:
90  asym = self[p]
91  # todo: handle overlapping ranges
93  rind = IMP.atom.Residue(p).get_index()
94  rng = asym(rind, rind)
96  # PMI Fragments always contain contiguous residues
97  rinds = IMP.atom.Fragment(p).get_residue_indexes()
98  rng = asym(rinds[0], rinds[-1])
99  else:
100  raise ValueError("Unsupported particle type %s" % str(p))
101  # Join contiguous ranges
102  if len(rngs) > 0 and rngs[-1].asym == asym \
103  and rngs[-1].seq_id_range[1] == rng.seq_id_range[0] - 1:
104  rngs[-1].seq_id_range = (rngs[-1].seq_id_range[0],
105  rng.seq_id_range[1])
106  else:
107  rngs.append(rng)
108  # If an identical feature already exists, return that
109  # todo: python-ihm should handle this automatically for us
110  hrngs = tuple(rngs)
111  if hrngs in self._seen_ranges:
112  return self._seen_ranges[hrngs]
113  else:
114  feat = ihm.restraint.ResidueFeature(rngs)
115  self._seen_ranges[hrngs] = feat
116  return feat
117 
118 
119 class _AllSoftware(object):
120  def __init__(self, system):
121  self.system = system
122  self.modeller_used = self.phyre2_used = False
123  self.pmi = ihm.Software(
124  name="IMP PMI module",
125  version=IMP.pmi.__version__,
126  classification="integrative model building",
127  description="integrative model building",
128  location='https://integrativemodeling.org')
129  self.imp = ihm.Software(
130  name="Integrative Modeling Platform (IMP)",
131  version=IMP.__version__,
132  classification="integrative model building",
133  description="integrative model building",
134  location='https://integrativemodeling.org')
135  self.system.software.extend([self.pmi, self.imp])
136 
137  def set_modeller_used(self, version, date):
138  if self.modeller_used:
139  return
140  self.modeller_used = True
141  self.system.software.append(ihm.Software(
142  name='MODELLER', classification='comparative modeling',
143  description='Comparative modeling by satisfaction '
144  'of spatial restraints, build ' + date,
145  location='https://salilab.org/modeller/',
146  version=version))
147 
148  def set_phyre2_used(self):
149  if self.phyre2_used:
150  return
151  self.phyre2_used = True
152  self.system.software.append(ihm.Software(
153  name='Phyre2', classification='protein homology modeling',
154  description='Protein Homology/analogY Recognition '
155  'Engine V 2.0',
156  version='2.0',
157  location='http://www.sbg.bio.ic.ac.uk/~phyre2/'))
158 
159 def _get_fragment_is_rigid(fragment):
160  """Determine whether a fragment is modeled rigidly"""
161  leaves = IMP.atom.get_leaves(fragment.hier)
162  # Assume all leaves are set up as rigid/flexible in the same way
163  return IMP.core.RigidMember.get_is_setup(leaves[0])
164 
165 class _PDBFragment(ihm.representation.ResidueSegment):
166  """Record details about part of a PDB file used as input
167  for a component."""
168  def __init__(self, state, component, start, end, pdb_offset,
169  pdbname, chain, hier, asym_unit):
170  # start, end are PMI residue indexes (not IHM)
171  super(_PDBFragment, self).__init__(
172  asym_unit=asym_unit.pmi_range(start, end),
173  rigid=None, primitive='sphere')
174  self.component, self.start, self.end, self.offset, self.pdbname \
175  = component, start, end, pdb_offset, pdbname
176  self.state, self.chain, self.hier = state, chain, hier
179  self.starting_hier = IMP.atom.read_pdb(pdbname, state.model, sel)
180 
181  rigid = property(lambda self: _get_fragment_is_rigid(self),
182  lambda self, val: None)
183 
184  def combine(self, other):
185  pass
186 
187 
188 class _BeadsFragment(ihm.representation.FeatureSegment):
189  """Record details about beads used to represent part of a component."""
190  chain = None
191  def __init__(self, state, component, start, end, count, hier, asym_unit):
192  super(_BeadsFragment, self).__init__(asym_unit=asym_unit(start, end),
193  rigid=None, primitive='sphere', count=count)
194  self.state, self.component, self.hier \
195  = state, component, hier
196 
197  rigid = property(lambda self: _get_fragment_is_rigid(self),
198  lambda self, val: None)
199 
200  def combine(self, other):
201  # todo: don't combine if one fragment is rigid and the other flexible
202  if type(other) == type(self) and \
203  other.asym_unit.seq_id_range[0] == self.asym_unit.seq_id_range[1] + 1:
204  self.asym_unit.seq_id_range = (self.asym_unit.seq_id_range[0],
205  other.asym_unit.seq_id_range[1])
206  self.count += other.count
207  return True
208 
209 
210 class _AllModelRepresentations(object):
211  def __init__(self, simo):
212  self.simo = simo
213  # dict of fragments, ordered by representation, then component name,
214  # then state
215  self.fragments = OrderedDict()
216  self._all_representations = {}
217 
218  def copy_component(self, state, name, original, asym_unit):
219  """Copy all representation for `original` in `state` to `name`"""
220  def copy_frag(f):
221  newf = copy.copy(f)
222  newf.asym_unit = asym_unit(*f.asym_unit.seq_id_range)
223  return newf
224  for rep in self.fragments:
225  if original in self.fragments[rep]:
226  if name not in self.fragments[rep]:
227  self.fragments[rep][name] = OrderedDict()
228  self.fragments[rep][name][state] = [copy_frag(f)
229  for f in self.fragments[rep][original][state]]
230  # Assume representation for a component is the same in all
231  # states, so only write out the first one
232  first_state = list(self.fragments[rep][name].keys())[0]
233  if state is first_state:
234  representation = self._all_representations[rep]
235  representation.extend(self.fragments[rep][name][state])
236 
237  def add_fragment(self, state, representation, fragment):
238  """Add a model fragment."""
239  comp = fragment.component
240  id_rep = id(representation)
241  self._all_representations[id_rep] = representation
242  if id_rep not in self.fragments:
243  self.fragments[id_rep] = OrderedDict()
244  if comp not in self.fragments[id_rep]:
245  self.fragments[id_rep][comp] = OrderedDict()
246  if state not in self.fragments[id_rep][comp]:
247  self.fragments[id_rep][comp][state] = []
248  fragments = self.fragments[id_rep][comp][state]
249  if len(fragments) == 0 or not fragments[-1].combine(fragment):
250  fragments.append(fragment)
251  # Assume representation for a component is the same in all states,
252  # so only write out the first one
253  first_state = list(self.fragments[id_rep][comp].keys())[0]
254  if state is first_state:
255  representation.append(fragment)
256 
257 
258 class _AllDatasets(object):
259  """Track all datasets generated by PMI and add them to the ihm.System"""
260  def __init__(self, system):
261  self.system = system
262  self._datasets = []
263  self._datasets_by_state = {}
264  self._restraints_by_state = {}
265 
266  def get_all_group(self, state):
267  """Get a DatasetGroup encompassing all datasets so far in this state"""
268  # Note that if a restraint dataset is replaced after this is called,
269  # the group will still contain the old dataset - mark dataset as read
270  # only?
271  g = ihm.dataset.DatasetGroup(self._datasets_by_state.get(state, [])
272  + [r.dataset for r in self._restraints_by_state.get(state, [])
273  if r.dataset])
274  return g
275 
276  def add(self, state, dataset):
277  """Add a new dataset."""
278  self._datasets.append(dataset)
279  if state not in self._datasets_by_state:
280  self._datasets_by_state[state] = []
281  self._datasets_by_state[state].append(dataset)
282  # Make sure that the dataset ends up in the mmCIF file even if nothing
283  # refers to it
284  self.system.orphan_datasets.append(dataset)
285  return dataset
286 
287  def add_restraint(self, state, restraint):
288  """Add the dataset for a restraint"""
289  if state not in self._restraints_by_state:
290  self._restraints_by_state[state] = []
291  self._restraints_by_state[state].append(restraint)
292 
293 
294 class _CrossLinkRestraint(ihm.restraint.CrossLinkRestraint):
295  """Restrain to a set of cross links"""
296 
297  assembly = None
298  _label_map = {'wtDSS':'DSS', 'scDSS':'DSS', 'scEDC':'EDC'}
299 
300  def __init__(self, pmi_restraint):
301  self.pmi_restraint = pmi_restraint
302  label = self.pmi_restraint.label
303  # Map commonly-used subtypes to more standard labels
304  label = self._label_map.get(label, label)
305  self.label = label
306  super(_CrossLinkRestraint, self).__init__(
307  dataset=self.pmi_restraint.dataset,
308  linker_type=label)
309 
310  def _set_psi_sigma(self, model):
311  old_values = []
312  # Do nothing if given a different state than the restraint applies to
313  if model.m != self.pmi_restraint.model:
314  return
315  for resolution in self.pmi_restraint.sigma_dictionary:
316  statname = 'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, self.label)
317  if model.stats and statname in model.stats:
318  sigma = float(model.stats[statname])
319  p = self.pmi_restraint.sigma_dictionary[resolution][0]
320  old_values.append((p, p.get_scale()))
321  p.set_scale(sigma)
322  for psiindex in self.pmi_restraint.psi_dictionary:
323  statname = 'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, self.label)
324  if model.stats and statname in model.stats:
325  psi = float(model.stats[statname])
326  p = self.pmi_restraint.psi_dictionary[psiindex][0]
327  old_values.append((p, p.get_scale()))
328  p.set_scale(psi)
329  # If the same particle is set multiple times, make sure we get the
330  # original value last
331  return list(reversed(old_values))
332 
333  def add_fits_from_model_statfile(self, model):
334  # Set psi/sigma for all particles
335  old_values = self._set_psi_sigma(model)
336  # If no stats were collected, we can't show the fit
337  if not old_values:
338  return
339  for xl in self.cross_links:
340  # Get current psi/sigma particle value for each XL
341  xl.fits[model] = ihm.restraint.CrossLinkFit(
342  psi=xl.psi, sigma1=xl.sigma1, sigma2=xl.sigma2)
343  # Restore original psi/sigma
344  for p, scale in old_values:
345  p.set_scale(scale)
346 
347  # Have our dataset point to that in the original PMI restraint
348  def __set_dataset(self, val):
349  self.pmi_restraint.dataset = val
350  dataset = property(lambda self: self.pmi_restraint.dataset,
351  __set_dataset)
352 
353 
354 def get_asym_mapper_for_state(simo, state, asym_map):
355  asym = asym_map.get(state, None)
356  if asym is None:
357  asym = _AsymMapper(simo, state.prot)
358  asym_map[state] = asym
359  return asym
360 
361 class _PMICrossLink(object):
362  # Query PMI particles to get psi and sigma values at time of use, not
363  # time of construction (since they may be set after creating the restraint)
364  psi = property(lambda self: self.psi_p.get_scale(),
365  lambda self, val: None)
366  sigma1 = property(lambda self: self.sigma1_p.get_scale(),
367  lambda self, val: None)
368  sigma2 = property(lambda self: self.sigma2_p.get_scale(),
369  lambda self, val: None)
370 
371 
372 class _ResidueCrossLink(ihm.restraint.ResidueCrossLink, _PMICrossLink):
373  pass
374 
375 
376 class _FeatureCrossLink(ihm.restraint.FeatureCrossLink, _PMICrossLink):
377  pass
378 
379 
380 class _EM2DRestraint(ihm.restraint.EM2DRestraint):
381  def __init__(self, state, pmi_restraint, image_number, resolution,
382  pixel_size, image_resolution, projection_number,
383  micrographs_number):
384  self.pmi_restraint, self.image_number = pmi_restraint, image_number
385  super(_EM2DRestraint, self).__init__(
386  dataset=pmi_restraint.datasets[image_number],
387  assembly=state.modeled_assembly,
388  segment=False, number_raw_micrographs=micrographs_number,
389  pixel_size_width=pixel_size, pixel_size_height=pixel_size,
390  image_resolution=image_resolution,
391  number_of_projections=projection_number)
392 
393  # Have our dataset point to that in the original PMI restraint
394  def __get_dataset(self):
395  return self.pmi_restraint.datasets[self.image_number]
396  def __set_dataset(self, val):
397  self.pmi_restraint.datasets[self.image_number] = val
398  dataset = property(__get_dataset, __set_dataset)
399 
400  def add_fits_from_model_statfile(self, model):
401  ccc = self._get_cross_correlation(model)
402  transform = self._get_transformation(model)
403  rot = transform.get_rotation()
404  rm = [[e for e in rot.get_rotation_matrix_row(i)] for i in range(3)]
405  self.fits[model] = ihm.restraint.EM2DRestraintFit(
406  cross_correlation_coefficient=ccc,
407  rot_matrix=rm,
408  tr_vector=transform.get_translation())
409 
410  def _get_transformation(self, model):
411  """Get the transformation that places the model on the image"""
412  stats = model.em2d_stats or model.stats
413  prefix = 'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
414  self.image_number + 1)
415  r = [float(stats[prefix + '_Rotation%d' % i]) for i in range(4)]
416  t = [float(stats[prefix + '_Translation%d' % i])
417  for i in range(3)]
418  # If the model coordinates are transformed, need to back transform
419  # them first
420  inv = model.transform.get_inverse()
422  IMP.algebra.Vector3D(*t)) * inv
423 
424  def _get_cross_correlation(self, model):
425  """Get the cross correlation coefficient between the model projection
426  and the image"""
427  stats = model.em2d_stats or model.stats
428  return float(stats['ElectronMicroscopy2D_%s_Image%d_CCC'
429  % (self.pmi_restraint.label,
430  self.image_number + 1)])
431 
432 
433 class _EM3DRestraint(ihm.restraint.EM3DRestraint):
434 
435  def __init__(self, simo, state, pmi_restraint, target_ps, densities):
436  self.pmi_restraint = pmi_restraint
437  super(_EM3DRestraint, self).__init__(
438  dataset=pmi_restraint.dataset,
439  assembly=self._get_assembly(densities, simo, state),
440  fitting_method='Gaussian mixture models',
441  number_of_gaussians=len(target_ps))
442 
443  # Have our dataset point to that in the original PMI restraint
444  def __set_dataset(self, val):
445  self.pmi_restraint.dataset = val
446  dataset = property(lambda self: self.pmi_restraint.dataset,
447  __set_dataset)
448 
449  def _get_assembly(self, densities, simo, state):
450  """Get the Assembly that this restraint acts on"""
451  cm = _ComponentMapper(state.prot)
452  components = {}
453  for d in densities:
454  components[cm[d]] = None # None == all residues in this component
455  a = simo._get_subassembly(components,
456  name="EM subassembly",
457  description="All components that fit the EM map")
458  return a
459 
460  def add_fits_from_model_statfile(self, model):
461  ccc = self._get_cross_correlation(model)
462  self.fits[model] = ihm.restraint.EM3DRestraintFit(
463  cross_correlation_coefficient=ccc)
464 
465  def _get_cross_correlation(self, model):
466  """Get the cross correlation coefficient between the model
467  and the map"""
468  if model.stats is not None:
469  return float(model.stats['GaussianEMRestraint_%s_CCC'
470  % self.pmi_restraint.label])
471 
472 
473 class _GeometricRestraint(ihm.restraint.GeometricRestraint):
474 
475  def __init__(self, simo, state, pmi_restraint, geometric_object,
476  feature, distance, sigma):
477  self.pmi_restraint = pmi_restraint
478  super(_GeometricRestraint, self).__init__(
479  dataset=pmi_restraint.dataset,
480  geometric_object=geometric_object, feature=feature,
481  distance=distance, harmonic_force_constant = 1. / sigma,
482  restrain_all=True)
483 
484  # Have our dataset point to that in the original PMI restraint
485  def __set_dataset(self, val):
486  self.pmi_restraint.dataset = val
487  dataset = property(lambda self: self.pmi_restraint.dataset,
488  __set_dataset)
489 
490 
491 class _ReplicaExchangeProtocolStep(ihm.protocol.Step):
492  def __init__(self, state, rex):
493  if rex.monte_carlo_sample_objects is not None:
494  method = 'Replica exchange monte carlo'
495  else:
496  method = 'Replica exchange molecular dynamics'
497  super(_ReplicaExchangeProtocolStep, self).__init__(
498  assembly=state.modeled_assembly,
499  dataset_group=None, # filled in by add_step()
500  method=method, name='Sampling',
501  num_models_begin=None, # filled in by add_step()
502  num_models_end=rex.vars["number_of_frames"],
503  multi_scale=True, multi_state=False, ordered=False)
504 
505 
506 class _SimpleProtocolStep(ihm.protocol.Step):
507  def __init__(self, state, num_models_end, method):
508  super(_SimpleProtocolStep, self).__init__(
509  assembly=state.modeled_assembly,
510  dataset_group=None, # filled in by add_step()
511  method=method, name='Sampling',
512  num_models_begin=None, # filled in by add_step()
513  num_models_end=num_models_end,
514  multi_scale=True, multi_state=False, ordered=False)
515 
516 
517 class _Chain(object):
518  """Represent a single chain in a Model"""
519  def __init__(self, pmi_chain_id, asym_unit):
520  self.pmi_chain_id, self.asym_unit = pmi_chain_id, asym_unit
521  self.spheres = []
522  self.atoms = []
523 
524  def add(self, xyz, atom_type, residue_type, residue_index,
525  all_indexes, radius):
526  if atom_type is None:
527  self.spheres.append((xyz, residue_type, residue_index,
528  all_indexes, radius))
529  else:
530  self.atoms.append((xyz, atom_type, residue_type, residue_index,
531  all_indexes, radius))
532  orig_comp = property(lambda self: self.comp)
533 
534 class _TransformedChain(object):
535  """Represent a chain that is a transformed version of another"""
536  def __init__(self, orig_chain, asym_unit, transform):
537  self.orig_chain, self.asym_unit = orig_chain, asym_unit
538  self.transform = transform
539 
540  def __get_spheres(self):
541  for (xyz, residue_type, residue_index, all_indexes,
542  radius) in self.orig_chain.spheres:
543  yield (self.transform * xyz, residue_type, residue_index,
544  all_indexes, radius)
545  spheres = property(__get_spheres)
546 
547  def __get_atoms(self):
548  for (xyz, atom_type, residue_type, residue_index, all_indexes,
549  radius) in self.orig_chain.atoms:
550  yield (self.transform * xyz, atom_type, residue_type, residue_index,
551  all_indexes, radius)
552  atoms = property(__get_atoms)
553 
554  entity = property(lambda self: self.orig_chain.entity)
555  orig_comp = property(lambda self: self.orig_chain.comp)
556 
557 class _Excluder(object):
558  def __init__(self, component, simo):
559  self._seqranges = simo._exclude_coords.get(component, [])
560 
561  def is_excluded(self, indexes):
562  """Return True iff the given sequence range is excluded."""
563  for seqrange in self._seqranges:
564  if indexes[0] >= seqrange[0] and indexes[-1] <= seqrange[1]:
565  return True
566 
567 
568 class _Model(ihm.model.Model):
569  def __init__(self, prot, simo, protocol, assembly, representation):
570  super(_Model, self).__init__(assembly=assembly, protocol=protocol,
571  representation=representation)
572  self.simo = weakref.proxy(simo)
573  # Transformation from IMP coordinates into mmCIF coordinate system.
574  # Normally we pass through coordinates unchanged, but in some cases
575  # we may want to translate them (e.g. Nup84, where the deposited PDB
576  # files are all centered; we want the mmCIF files to match)
578  self.em2d_stats = None
579  self.stats = None
580  # True iff restraints act on this model
581  self._is_restrained = True
582  o = self.output = IMP.pmi.output.Output(atomistic=True)
583  name = 'cif-output'
584  self.m = prot.get_model()
585  o.dictionary_pdbs[name] = prot
586  o._init_dictchain(name, prot, multichar_chain=True)
587  (particle_infos_for_pdb,
588  self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
589  self.geometric_center = IMP.algebra.Vector3D(*self.geometric_center)
590  self._make_spheres_atoms(particle_infos_for_pdb, o, name, simo)
591  self.rmsf = {}
592 
593  def all_chains(self, simo):
594  """Yield all chains, including transformed ones"""
595  chain_for_comp = {}
596  for c in self.chains:
597  yield c
598  chain_for_comp[c.comp] = c
599  for tc in simo._transformed_components:
600  orig_chain = chain_for_comp.get(tc.original, None)
601  if orig_chain:
602  asym = simo.asym_units[tc.name]
603  c = _TransformedChain(orig_chain, asym, tc.transform)
604  c.comp = tc.name
605  yield c
606 
607  def _make_spheres_atoms(self, particle_infos_for_pdb, o, name, simo):
608  entity_for_chain = {}
609  comp_for_chain = {}
610  correct_asym = {}
611  for protname, chain_id in o.dictchain[name].items():
612  if protname in simo.entities:
613  entity_for_chain[chain_id] = simo.entities[protname]
614  else:
615  # Remove copy number
616  pn = protname.split('.')[0]
617  entity_for_chain[chain_id] = simo.entities[pn]
618  comp_for_chain[chain_id] = protname
619  # When doing multi-state modeling, the chain ID returned here
620  # (assigned sequentially) might not be correct (states may have
621  # gaps in the chain IDs). Map it to the correct asym unit.
622  correct_asym[chain_id] = simo.asym_units[protname]
623 
624  # Gather by chain ID (should be sorted by chain ID already)
625  self.chains = []
626  chain = None
627  excluder = None
628 
629  for (xyz, atom_type, residue_type, chain_id, residue_index,
630  all_indexes, radius) in particle_infos_for_pdb:
631  if chain is None or chain.pmi_chain_id != chain_id:
632  chain = _Chain(chain_id, correct_asym[chain_id])
633  chain.entity = entity_for_chain[chain_id]
634  chain.comp = comp_for_chain[chain_id]
635  self.chains.append(chain)
636  excluder = _Excluder(chain.comp, simo)
637  if not excluder.is_excluded(all_indexes if all_indexes
638  else [residue_index]):
639  chain.add(xyz, atom_type, residue_type, residue_index,
640  all_indexes, radius)
641 
642  def parse_rmsf_file(self, fname, component):
643  self.rmsf[component] = rmsf = {}
644  with open(fname) as fh:
645  for line in fh:
646  resnum, blocknum, val = line.split()
647  rmsf[int(resnum)] = (int(blocknum), float(val))
648 
649  def get_rmsf(self, component, indexes):
650  """Get the RMSF value for the given residue indexes."""
651  if not self.rmsf:
652  return None
653  rmsf = self.rmsf[component]
654  blocknums = dict.fromkeys(rmsf[ind][0] for ind in indexes)
655  if len(blocknums) != 1:
656  raise ValueError("Residue indexes %s aren't all in the same block"
657  % str(indexes))
658  return rmsf[indexes[0]][1]
659 
660  def get_atoms(self):
661  for chain in self.all_chains(self.simo):
662  pmi_offset = chain.asym_unit.entity.pmi_offset
663  for atom in chain.atoms:
664  (xyz, atom_type, residue_type, residue_index,
665  all_indexes, radius) = atom
666  pt = self.transform * xyz
667  yield ihm.model.Atom(asym_unit=chain.asym_unit,
668  seq_id=residue_index - pmi_offset,
669  atom_id=atom_type.get_string(),
670  type_symbol=None, # todo: get element
671  x=pt[0], y=pt[1], z=pt[2])
672 
673  def get_spheres(self):
674  for chain in self.all_chains(self.simo):
675  pmi_offset = chain.asym_unit.entity.pmi_offset
676  for sphere in chain.spheres:
677  (xyz, residue_type, residue_index,
678  all_indexes, radius) = sphere
679  if all_indexes is None:
680  all_indexes = (residue_index,)
681  pt = self.transform * xyz
682  yield ihm.model.Sphere(asym_unit=chain.asym_unit,
683  seq_id_range=(all_indexes[0] - pmi_offset,
684  all_indexes[-1] - pmi_offset),
685  x=pt[0], y=pt[1], z=pt[2], radius=radius,
686  rmsf=self.get_rmsf(chain.orig_comp, all_indexes))
687 
688 
689 class _AllProtocols(object):
690  def __init__(self, simo):
691  self.simo = weakref.proxy(simo)
692  # Lists of Protocols by state
693  self.protocols = OrderedDict()
694 
695  def add_protocol(self, state):
696  """Add a new Protocol"""
697  if state not in self.protocols:
698  self.protocols[state] = []
699  p = ihm.protocol.Protocol()
700  self.simo.system.orphan_protocols.append(p)
701  self.protocols[state].append(p)
702 
703  def add_step(self, step, state):
704  """Add a ProtocolStep to the last Protocol of the given State"""
705  if state not in self.protocols:
706  self.add_protocol(state)
707  protocol = self.get_last_protocol(state)
708  if len(protocol.steps) == 0:
709  step.num_models_begin = 0
710  else:
711  step.num_models_begin = protocol.steps[-1].num_models_end
712  protocol.steps.append(step)
713  step.id = len(protocol.steps)
714  # Assume that protocol uses all currently-defined datasets
715  step.dataset_group = self.simo.all_datasets.get_all_group(state)
716 
717  def add_postproc(self, step, state):
718  """Add a postprocessing step to the last protocol"""
719  protocol = self.get_last_protocol(state)
720  if len(protocol.analyses) == 0:
721  protocol.analyses.append(ihm.analysis.Analysis())
722  protocol.analyses[-1].steps.append(step)
723 
724  def get_last_protocol(self, state):
725  """Return the most recently-added _Protocol"""
726  return self.protocols[state][-1]
727 
728 
729 class _AllStartingModels(object):
730  def __init__(self, simo):
731  self.simo = simo
732  # dict of starting models (entire PDB files), collected from fragments,
733  # ordered by component name and state
734  self.models = OrderedDict()
735  self.output = IMP.pmi.output.Output()
736 
737  def add_pdb_fragment(self, fragment):
738  """Add a starting model PDB fragment."""
739  comp = fragment.component
740  state = fragment.state
741  if comp not in self.models:
742  self.models[comp] = OrderedDict()
743  if state not in self.models[comp]:
744  self.models[comp][state] = []
745  models = self.models[comp][state]
746  if len(models) == 0 \
747  or models[-1].fragments[0].pdbname != fragment.pdbname:
748  model = self._add_model(fragment)
749  models.append(model)
750  else:
751  # Break circular ref between fragment and model
752  models[-1].fragments.append(weakref.proxy(fragment))
753  # Update residue range to cover all fragments
754  pmi_offset = models[-1].asym_unit.entity.pmi_offset
755  sid_begin = min(fragment.start + fragment.offset - pmi_offset,
756  models[-1].asym_unit.seq_id_range[0])
757  sid_end = max(fragment.end + fragment.offset - pmi_offset,
758  models[-1].asym_unit.seq_id_range[1])
759  models[-1].asym_unit = fragment.asym_unit.asym(sid_begin, sid_end)
760  fragment.starting_model = models[-1]
761 
762  def _add_model(self, f):
763  parser = ihm.metadata.PDBParser()
764  r = parser.parse_file(f.pdbname)
765 
766  self.simo._add_dataset(r['dataset'])
767  # We only want the templates that model the starting model chain
768  templates = r['templates'].get(f.chain, [])
769  for t in templates:
770  if t.alignment_file:
771  self.simo.system.locations.append(t.alignment_file)
772  if t.dataset:
773  self.simo._add_dataset(t.dataset)
774  pmi_offset = f.asym_unit.entity.pmi_offset
775  m = _StartingModel(
776  asym_unit=f.asym_unit.asym.pmi_range(f.start + f.offset,
777  f.end + f.offset),
778  dataset=r['dataset'], asym_id=f.chain,
779  templates=templates, offset=f.offset + pmi_offset,
780  metadata=r['metadata'],
781  software=r['software'][0] if r['software'] else None,
782  script_file=r['script'])
783  m.fragments = [weakref.proxy(f)]
784  return m
785 
786 
787 class _StartingModel(ihm.startmodel.StartingModel):
788  def get_seq_dif(self):
789  return self._seq_dif
790 
791  def get_atoms(self):
792  pmi_offset = self.asym_unit.entity.pmi_offset
793  mh = IMP.mmcif.data._StartingModelAtomHandler(self.templates,
794  self.asym_unit)
795  for f in self.fragments:
796  sel = IMP.atom.Selection(f.starting_hier,
797  residue_indexes=list(range(f.start - f.offset,
798  f.end - f.offset + 1)))
799  for a in mh.get_ihm_atoms(sel.get_selected_particles(),
800  f.offset - pmi_offset):
801  yield a
802  self._seq_dif = mh._seq_dif
803 
804 
805 class _ReplicaExchangeAnalysisPostProcess(ihm.analysis.ClusterStep):
806  """Post processing using AnalysisReplicaExchange0 macro"""
807 
808  def __init__(self, rex, num_models_begin):
809  self.rex = rex
810  num_models_end = 0
811  for fname in self.get_all_stat_files():
812  with open(fname) as fh:
813  num_models_end += len(fh.readlines())
814  super(_ReplicaExchangeAnalysisPostProcess, self).__init__(
815  feature='RMSD', num_models_begin=num_models_begin,
816  num_models_end=num_models_end)
817 
818  def get_stat_file(self, cluster_num):
819  return os.path.join(self.rex._outputdir, "cluster.%d" % cluster_num,
820  'stat.out')
821 
822  def get_all_stat_files(self):
823  for i in range(self.rex._number_of_clusters):
824  yield self.get_stat_file(i)
825 
826 
827 class _ReplicaExchangeAnalysisEnsemble(ihm.model.Ensemble):
828  """Ensemble generated using AnalysisReplicaExchange0 macro"""
829 
830  num_models_deposited = None # Override base class property
831 
832  def __init__(self, pp, cluster_num, model_group, num_deposit):
833  with open(pp.get_stat_file(cluster_num)) as fh:
834  num_models = len(fh.readlines())
835  super(_ReplicaExchangeAnalysisEnsemble, self).__init__(
836  num_models=num_models,
837  model_group=model_group, post_process=pp,
838  clustering_feature=pp.feature,
839  name=model_group.name)
840  self.cluster_num = cluster_num
841  self.num_models_deposited = num_deposit
842 
843  def get_rmsf_file(self, component):
844  return os.path.join(self.post_process.rex._outputdir,
845  'cluster.%d' % self.cluster_num,
846  'rmsf.%s.dat' % component)
847 
848  def load_rmsf(self, model, component):
849  fname = self.get_rmsf_file(component)
850  if os.path.exists(fname):
851  model.parse_rmsf_file(fname, component)
852 
853  def get_localization_density_file(self, fname):
854  return os.path.join(self.post_process.rex._outputdir,
855  'cluster.%d' % self.cluster_num,
856  '%s.mrc' % fname)
857 
858  def load_localization_density(self, state, fname, select_tuple, asym_units):
859  fullpath = self.get_localization_density_file(fname)
860  if os.path.exists(fullpath):
861  details = "Localization density for %s %s" \
862  % (fname, self.model_group.name)
863  local_file = ihm.location.OutputFileLocation(fullpath,
864  details=details)
865  for s in select_tuple:
866  if isinstance(s, tuple) and len(s) == 3:
867  asym = asym_units[s[2]].pmi_range(s[0], s[1])
868  else:
869  asym = asym_units[s]
870  den = ihm.model.LocalizationDensity(file=local_file,
871  asym_unit=asym)
872  self.densities.append(den)
873 
874  def load_all_models(self, simo, state):
875  stat_fname = self.post_process.get_stat_file(self.cluster_num)
876  model_num = 0
877  with open(stat_fname) as fh:
878  stats = ast.literal_eval(fh.readline())
879  # Correct path
880  rmf_file = os.path.join(os.path.dirname(stat_fname),
881  "%d.rmf3" % model_num)
882  for c in state.all_modeled_components:
883  # todo: this only works with PMI 1
884  state._pmi_object.set_coordinates_from_rmf(c, rmf_file, 0,
885  force_rigid_update=True)
886  # todo: fill in other data from stat file, e.g. crosslink phi/psi
887  yield stats
888  model_num += 1
889  if model_num >= self.num_models_deposited:
890  return
891 
892  # todo: also support dRMS precision
893  def _get_precision(self):
894  precfile = os.path.join(self.post_process.rex._outputdir,
895  "precision.%d.%d.out" % (self.cluster_num,
896  self.cluster_num))
897  if not os.path.exists(precfile):
898  return ihm.unknown
899  # Fail if the precision.x.x.out file doesn't match the cluster
900  r = re.compile('All .*/cluster.%d/ average centroid distance ([\d\.]+)'
901  % self.cluster_num)
902  with open(precfile) as fh:
903  for line in fh:
904  m = r.match(line)
905  if m:
906  return float(m.group(1))
907 
908  precision = property(lambda self: self._get_precision(),
909  lambda self, val: None)
910 
911 class _SimpleEnsemble(ihm.model.Ensemble):
912  """Simple manually-created ensemble"""
913 
914  num_models_deposited = None # Override base class property
915 
916  def __init__(self, pp, model_group, num_models, drmsd,
917  num_models_deposited, ensemble_file):
918  super(_SimpleEnsemble, self).__init__(
919  model_group=model_group, post_process=pp, num_models=num_models,
920  file=ensemble_file, precision=drmsd, name=model_group.name,
921  clustering_feature='dRMSD')
922  self.num_models_deposited = num_models_deposited
923 
924  def load_localization_density(self, state, component, asym, local_file):
925  den = ihm.model.LocalizationDensity(file=local_file, asym_unit=asym)
926  self.densities.append(den)
927 
928 
929 class _EntityMapper(dict):
930  """Handle mapping from IMP components (without copy number) to CIF entities.
931  Multiple components may map to the same entity if they share sequence."""
932  def __init__(self, system):
933  super(_EntityMapper, self).__init__()
934  self._sequence_dict = {}
935  self._entities = []
936  self.system = system
937 
938  def add(self, component_name, sequence, offset):
939  def entity_seq(sequence):
940  # Map X to UNK
941  if 'X' in sequence:
942  return ['UNK' if s == 'X' else s for s in sequence]
943  else:
944  return sequence
945  if sequence not in self._sequence_dict:
946  # Use the name of the first component, stripped of any copy number,
947  # as the description of the entity
948  d = component_name.split("@")[0].split(".")[0]
949  entity = Entity(entity_seq(sequence), description=d,
950  pmi_offset=offset)
951  self.system.entities.append(entity)
952  self._sequence_dict[sequence] = entity
953  self[component_name] = self._sequence_dict[sequence]
954 
955 
956 class _TransformedComponent(object):
957  def __init__(self, name, original, transform):
958  self.name, self.original, self.transform = name, original, transform
959 
960 
961 class _SimpleRef(object):
962  """Class with similar interface to weakref.ref, but keeps a strong ref"""
963  def __init__(self, ref):
964  self.ref = ref
965  def __call__(self):
966  return self.ref
967 
968 
969 class _State(ihm.model.State):
970  """Representation of a single state in the system."""
971 
972  def __init__(self, pmi_object, po):
973  # Point to the PMI object for this state. Use a weak reference
974  # since the state object typically points to us too, so we need
975  # to break the reference cycle. In PMI1 this will be a
976  # Representation object; in PMI2 it is the PMI2 State object itself.
977  self._pmi_object = weakref.proxy(pmi_object)
978  if hasattr(pmi_object, 'state'):
979  # Need a strong ref to pmi_object.state to prevent it from being
980  # cleaned up when doing PMI1 multistate modeling
981  self._pmi_state = _SimpleRef(pmi_object.state)
982  else:
983  self._pmi_state = weakref.ref(pmi_object)
984  # Preserve PMI state name
985  old_name = self.name
986  super(_State, self).__init__(experiment_type='Fraction of bulk')
987  self.name = old_name
988 
989  # The assembly of all components modeled by IMP in this state.
990  # This may be smaller than the complete assembly.
991  self.modeled_assembly = ihm.Assembly(
992  name="Modeled assembly",
993  description=self.get_postfixed_name(
994  "All components modeled by IMP"))
995  po.system.orphan_assemblies.append(self.modeled_assembly)
996 
997  self.all_modeled_components = []
998 
999  def __hash__(self):
1000  return hash(self._pmi_state())
1001  def __eq__(self, other):
1002  return self._pmi_state() == other._pmi_state()
1003 
1004  def add_model_group(self, group):
1005  self.append(group)
1006  return group
1007 
1008  def get_prefixed_name(self, name):
1009  """Prefix the given name with the state name, if available."""
1010  if self.short_name:
1011  return self.short_name + ' ' + name
1012  else:
1013  # Can't use capitalize() since that un-capitalizes everything
1014  # but the first letter
1015  return name[0].upper() + name[1:] if name else ''
1016 
1017  def get_postfixed_name(self, name):
1018  """Postfix the given name with the state name, if available."""
1019  if self.short_name:
1020  return "%s in state %s" % (name, self.short_name)
1021  else:
1022  return name
1023 
1024  short_name = property(lambda self: self._pmi_state().short_name)
1025  long_name = property(lambda self: self._pmi_state().long_name)
1026  def __get_name(self):
1027  return self._pmi_state().long_name
1028  def __set_name(self, val):
1029  self._pmi_state().long_name = val
1030  name = property(__get_name, __set_name)
1031 
1032 
1033 class Entity(ihm.Entity):
1034  """A single entity in the system.
1035  This functions identically to the base ihm.Entity class, but it
1036  allows identifying residues by either the IHM numbering scheme
1037  (seq_id, which is always contiguous starting at 1) or by the PMI
1038  scheme (which is similar, but may not start at 1 if the sequence in
1039  the FASTA file has one or more N-terminal gaps"""
1040  def __init__(self, sequence, pmi_offset, *args, **keys):
1041  # Offset between PMI numbering and IHM; <pmi_#> = <ihm_#> + pmi_offset
1042  # (pmi_offset is also the number of N-terminal gaps in the FASTA file)
1043  self.pmi_offset = pmi_offset
1044  super(Entity, self).__init__(sequence, *args, **keys)
1045 
1046  def pmi_residue(self, res_id):
1047  """Return a single IHM residue indexed using PMI numbering"""
1048  return self.residue(res_id - self.pmi_offset)
1049 
1050  def pmi_range(self, res_id_begin, res_id_end):
1051  """Return a range of IHM residues indexed using PMI numbering"""
1052  off = self.pmi_offset
1053  return self(res_id_begin - off, res_id_end - off)
1054 
1055 
1056 class AsymUnit(ihm.AsymUnit):
1057  """A single asymmetric unit in the system.
1058  This functions identically to the base ihm.AsymUnit class, but it
1059  allows identifying residues by either the IHM numbering scheme
1060  (seq_id, which is always contiguous starting at 1) or by the PMI
1061  scheme (which is similar, but may not start at 1 if the sequence in
1062  the FASTA file has one or more N-terminal gaps"""
1063 
1064  def __init__(self, entity, *args, **keys):
1065  super(AsymUnit, self).__init__(
1066  entity, auth_seq_id_map=entity.pmi_offset, *args, **keys)
1067 
1068  def pmi_residue(self, res_id):
1069  """Return a single IHM residue indexed using PMI numbering"""
1070  return self.residue(res_id - self.entity.pmi_offset)
1071 
1072  def pmi_range(self, res_id_begin, res_id_end):
1073  """Return a range of IHM residues indexed using PMI numbering"""
1074  off = self.entity.pmi_offset
1075  return self(res_id_begin - off, res_id_end - off)
1076 
1077 
1079  """Class to encode a modeling protocol as mmCIF.
1080 
1081  IMP has basic support for writing out files in mmCIF format, for
1082  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
1083  After creating an instance of this class, attach it to an
1084  IMP.pmi.representation.Representation object. After this, any
1085  generated models and metadata are automatically collected and
1086  output as mmCIF.
1087  """
1088  def __init__(self, fh):
1089  # Ultimately, collect data in an ihm.System object
1090  self.system = ihm.System()
1091  self._state_group = ihm.model.StateGroup()
1092  self.system.state_groups.append(self._state_group)
1093 
1094  self.fh = fh
1095  self._state_ensemble_offset = 0
1096  self._each_metadata = [] # list of metadata for each representation
1097  self._file_datasets = []
1098  self._main_script = os.path.abspath(sys.argv[0])
1099 
1100  # Point to the main modeling script
1101  loc = ihm.location.WorkflowFileLocation(path=self._main_script,
1102  details="The main integrative modeling script")
1103  self.system.locations.append(loc)
1104 
1105  self._states = {}
1106  self.__asym_states = {}
1107  self._working_directory = os.getcwd()
1108  self.default_representation = self.create_representation(
1109  "Default representation")
1110  self.entities = _EntityMapper(self.system)
1111  # Mapping from component names to ihm.AsymUnit
1112  self.asym_units = {}
1113  self._all_components = {}
1114  self.all_modeled_components = []
1115  self._transformed_components = []
1116  self.sequence_dict = {}
1117 
1118  # Common geometric objects used in PMI systems
1119  self._xy_plane = ihm.geometry.XYPlane()
1120  self._xz_plane = ihm.geometry.XZPlane()
1121  self._z_axis = ihm.geometry.ZAxis()
1122  self._center_origin = ihm.geometry.Center(0,0,0)
1123  self._identity_transform = ihm.geometry.Transformation.identity()
1124 
1125  # Coordinates to exclude
1126  self._exclude_coords = {}
1127 
1128  self.all_representations = _AllModelRepresentations(self)
1129  self.all_protocols = _AllProtocols(self)
1130  self.all_datasets = _AllDatasets(self.system)
1131  self.all_starting_models = _AllStartingModels(self)
1132 
1133  self.all_software = _AllSoftware(self.system)
1134 
1135  def create_representation(self, name):
1136  """Create a new Representation and return it. This can be
1137  passed to add_model(), add_bead_element() or add_pdb_element()."""
1138  r = ihm.representation.Representation()
1139  self.system.orphan_representations.append(r)
1140  return r
1141 
1142  def exclude_coordinates(self, component, seqrange):
1143  """Don't record coordinates for the given domain.
1144  Coordinates for the given domain (specified by a component name
1145  and a 2-element tuple giving the start and end residue numbers)
1146  will be excluded from the mmCIF file. This can be used to exclude
1147  parts of the structure that weren't well resolved in modeling.
1148  Any bead or residue that lies wholly within this range will be
1149  excluded. Multiple ranges for a given component can be excluded
1150  by calling this method multiple times."""
1151  if component not in self._exclude_coords:
1152  self._exclude_coords[component] = []
1153  self._exclude_coords[component].append(seqrange)
1154 
1155  def _is_excluded(self, component, start, end):
1156  """Return True iff this chunk of sequence should be excluded"""
1157  for seqrange in self._exclude_coords.get(component, ()):
1158  if start >= seqrange[0] and end <= seqrange[1]:
1159  return True
1160 
1161  def _add_state(self, state):
1162  """Create a new state and return a pointer to it."""
1163  self._state_ensemble_offset = len(self.system.ensembles)
1164  s = _State(state, self)
1165  self._state_group.append(s)
1166  self._last_state = s
1167  return s
1168 
1169  def get_file_dataset(self, fname):
1170  for d in self._file_datasets:
1171  fd = d.get(os.path.abspath(fname), None)
1172  if fd:
1173  return fd
1174 
1175  def _get_chain_for_component(self, name, output):
1176  """Get the chain ID for a component, if any."""
1177  # todo: handle multiple copies
1178  if name in self.asym_units:
1179  return self.asym_units[name]._id
1180  else:
1181  # A non-modeled component doesn't have a chain ID
1182  return None
1183 
1184  def _get_assembly_comps(self, assembly):
1185  """Get the names of the components in the given assembly"""
1186  # component name = asym_unit.details
1187  comps = {}
1188  for ca in assembly:
1189  comps[ca.details] = None
1190  return comps
1191 
1192  def create_transformed_component(self, state, name, original, transform):
1193  """Make a new component that's a transformed copy of another.
1194  All representation for the existing component is copied to the
1195  new one."""
1196  assembly_comps = self._get_assembly_comps(state.modeled_assembly)
1197  if name in assembly_comps:
1198  raise ValueError("Component %s already exists" % name)
1199  elif original not in assembly_comps:
1200  raise ValueError("Original component %s does not exist" % original)
1201  self.create_component(state, name, True)
1202  self.add_component_sequence(state, name, self.sequence_dict[original])
1203  self._transformed_components.append(_TransformedComponent(
1204  name, original, transform))
1205  self.all_representations.copy_component(state, name, original,
1206  self.asym_units[name])
1207 
1208  def create_component(self, state, name, modeled, asym_name=None):
1209  if asym_name is None:
1210  asym_name = name
1211  new_comp = name not in self._all_components
1212  self._all_components[name] = None
1213  if modeled:
1214  state.all_modeled_components.append(name)
1215  if new_comp:
1216  # assign asym once we get sequence
1217  self.asym_units[asym_name] = None
1218  self.all_modeled_components.append(name)
1219 
1220  def add_component_sequence(self, state, name, seq, asym_name=None):
1221  if asym_name is None:
1222  asym_name = name
1223  def get_offset(seq):
1224  # Count length of gaps at start of sequence
1225  for i in range(len(seq)):
1226  if seq[i] != '-':
1227  return seq[i:], i
1228  seq, offset = get_offset(seq)
1229  if name in self.sequence_dict:
1230  if self.sequence_dict[name] != seq:
1231  raise ValueError("Sequence mismatch for component %s" % name)
1232  else:
1233  self.sequence_dict[name] = seq
1234  self.entities.add(name, seq, offset)
1235  if asym_name in self.asym_units:
1236  if self.asym_units[asym_name] is None:
1237  # Set up a new asymmetric unit for this component
1238  entity = self.entities[name]
1239  asym = AsymUnit(entity, details=asym_name)
1240  self.system.asym_units.append(asym)
1241  self.asym_units[asym_name] = asym
1242  state.modeled_assembly.append(self.asym_units[asym_name])
1243 
1244  def _add_restraint_model_fits(self):
1245  """Add fits to restraints for all known models"""
1246  for group, m in self.system._all_models():
1247  if m._is_restrained:
1248  for r in self.system.restraints:
1249  if hasattr(r, 'add_fits_from_model_statfile'):
1250  r.add_fits_from_model_statfile(m)
1251 
1252  def flush(self, format='mmCIF'):
1253  """Write out all information to the file.
1254  Information can be written in any format supported by
1255  the ihm library (typically this is 'mmCIF' or 'BCIF')."""
1256  self.finalize()
1257  ihm.dumper.write(self.fh, [self.system], format)
1258 
1259  def finalize(self):
1260  """Do any final processing on the class hierarchy.
1261  After calling this method, the `system` member (an instance
1262  of `ihm.System`) completely reproduces the PMI modeling, and
1263  can be written out to an mmCIF file with `ihm.dumper.write`,
1264  and/or modified using the ihm API."""
1265  self._add_restraint_model_fits()
1266 
1267  # Add metadata to ihm.System
1268  self.system.software.extend(m for m in self._metadata
1269  if isinstance(m, ihm.Software))
1270  self.system.citations.extend(m for m in self._metadata
1271  if isinstance(m, ihm.Citation))
1272  self.system.locations.extend(m for m in self._metadata
1273  if isinstance(m, ihm.location.FileLocation))
1274 
1275  # Point all locations to repos, if applicable
1276  all_repos = [m for m in self._metadata
1277  if isinstance(m, ihm.location.Repository)]
1278  self.system.update_locations_in_repositories(all_repos)
1279 
1280  def add_pdb_element(self, state, name, start, end, offset, pdbname,
1281  chain, hier, representation=None):
1282  if self._is_excluded(name, start, end):
1283  return
1284  if representation is None:
1285  representation = self.default_representation
1286  asym = self.asym_units[name]
1287  p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
1288  hier, asym)
1289  self.all_representations.add_fragment(state, representation, p)
1290  self.all_starting_models.add_pdb_fragment(p)
1291 
1292  def add_bead_element(self, state, name, start, end, num, hier,
1293  representation=None):
1294  if self._is_excluded(name, start, end):
1295  return
1296  if representation is None:
1297  representation = self.default_representation
1298  asym = self.asym_units[name]
1299  pmi_offset = asym.entity.pmi_offset
1300  b = _BeadsFragment(state, name, start - pmi_offset, end - pmi_offset,
1301  num, hier, asym)
1302  self.all_representations.add_fragment(state, representation, b)
1303 
1304  def get_cross_link_group(self, pmi_restraint):
1305  r = _CrossLinkRestraint(pmi_restraint)
1306  self.system.restraints.append(r)
1307  self._add_restraint_dataset(r) # so that all-dataset group works
1308  return r
1309 
1310  def add_experimental_cross_link(self, r1, c1, r2, c2, rsr):
1311  if c1 not in self._all_components or c2 not in self._all_components:
1312  # Crosslink refers to a component we didn't model
1313  # As a quick hack, just ignore it.
1314  # todo: need to add an entity for this anyway (so will need the
1315  # sequence, and add to struct_assembly)
1316  return None
1317  e1 = self.entities[c1]
1318  e2 = self.entities[c2]
1319  xl = ihm.restraint.ExperimentalCrossLink(residue1=e1.pmi_residue(r1),
1320  residue2=e2.pmi_residue(r2))
1321  rsr.experimental_cross_links.append([xl])
1322  return xl
1323 
1324  def add_cross_link(self, state, ex_xl, p1, p2, length, sigma1_p, sigma2_p,
1325  psi_p, rsr):
1326  # Map p1/p2 to asym units
1327  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1328  d = ihm.restraint.UpperBoundDistanceRestraint(length)
1329 
1330  if _get_by_residue(p1) and _get_by_residue(p2):
1331  cls = _ResidueCrossLink
1332  else:
1333  cls = _FeatureCrossLink
1334  xl = cls(ex_xl, asym1=asym[p1], asym2=asym[p2], distance=d,
1335  restrain_all=True)
1336  # Needed to look up fits later
1337  xl.psi_p, xl.sigma1_p, xl.sigma2_p = psi_p, sigma1_p, sigma2_p
1338  rsr.cross_links.append(xl)
1339 
1340  def add_replica_exchange(self, state, rex):
1341  # todo: allow for metadata to say how many replicas were used in the
1342  # actual experiment, and how many independent runs were carried out
1343  # (use these as multipliers to get the correct total number of
1344  # output models)
1345  step = _ReplicaExchangeProtocolStep(state, rex)
1346  step.software = self.all_software.pmi
1347  self.all_protocols.add_step(step, state)
1348 
1349  def _add_simple_dynamics(self, num_models_end, method):
1350  # Always assumed that we're dealing with the last state
1351  state = self._last_state
1352  self.all_protocols.add_step(_SimpleProtocolStep(state, num_models_end,
1353  method), state)
1354 
1355  def _add_protocol(self):
1356  # Always assumed that we're dealing with the last state
1357  state = self._last_state
1358  self.all_protocols.add_protocol(state)
1359 
1360  def _add_dataset(self, dataset):
1361  return self.all_datasets.add(self._last_state, dataset)
1362 
1363  def _add_restraint_dataset(self, restraint):
1364  return self.all_datasets.add_restraint(self._last_state, restraint)
1365 
1366  def _add_simple_postprocessing(self, num_models_begin, num_models_end):
1367  # Always assumed that we're dealing with the last state
1368  state = self._last_state
1369  pp = ihm.analysis.ClusterStep('RMSD', num_models_begin, num_models_end)
1370  self.all_protocols.add_postproc(pp, state)
1371  return pp
1372 
1373  def _add_no_postprocessing(self, num_models):
1374  # Always assumed that we're dealing with the last state
1375  state = self._last_state
1376  pp = ihm.analysis.EmptyStep()
1377  pp.num_models_begin = pp.num_models_end = num_models
1378  self.all_protocols.add_postproc(pp, state)
1379  return pp
1380 
1381  def _add_simple_ensemble(self, pp, name, num_models, drmsd,
1382  num_models_deposited, localization_densities,
1383  ensemble_file):
1384  """Add an ensemble generated by ad hoc methods (not using PMI).
1385  This is currently only used by the Nup84 system."""
1386  # Always assumed that we're dealing with the last state
1387  state = self._last_state
1388  group = ihm.model.ModelGroup(name=state.get_postfixed_name(name))
1389  state.add_model_group(group)
1390  if ensemble_file:
1391  self.system.locations.append(ensemble_file)
1392  e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
1393  ensemble_file)
1394  self.system.ensembles.append(e)
1395  for c in state.all_modeled_components:
1396  den = localization_densities.get(c, None)
1397  if den:
1398  e.load_localization_density(state, c, self.asym_units[c], den)
1399  return e
1400 
1401  def set_ensemble_file(self, i, location):
1402  """Point a previously-created ensemble to an 'all-models' file.
1403  This could be a trajectory such as DCD, an RMF, or a multimodel
1404  PDB file."""
1405  self.system.locations.append(location)
1406  # Ensure that we point to an ensemble related to the current state
1407  ind = i + self._state_ensemble_offset
1408  self.system.ensembles[ind].file = location
1409 
1410  def add_replica_exchange_analysis(self, state, rex, density_custom_ranges):
1411  # todo: add prefilter as an additional postprocess step (complication:
1412  # we don't know how many models it filtered)
1413  # todo: postpone rmsf/density localization extraction until after
1414  # relevant methods are called (currently it is done from the
1415  # constructor)
1416  protocol = self.all_protocols.get_last_protocol(state)
1417  num_models = protocol.steps[-1].num_models_end
1418  pp = _ReplicaExchangeAnalysisPostProcess(rex, num_models)
1419  pp.software = self.all_software.pmi
1420  self.all_protocols.add_postproc(pp, state)
1421  for i in range(rex._number_of_clusters):
1422  group = ihm.model.ModelGroup(name=state.get_prefixed_name(
1423  'cluster %d' % (i + 1)))
1424  state.add_model_group(group)
1425  # todo: make # of models to deposit configurable somewhere
1426  e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
1427  self.system.ensembles.append(e)
1428  # Add localization density info if available
1429  for fname, stuple in sorted(density_custom_ranges.items()):
1430  e.load_localization_density(state, fname, stuple,
1431  self.asym_units)
1432  for stats in e.load_all_models(self, state):
1433  m = self.add_model(group)
1434  # Since we currently only deposit 1 model, it is the
1435  # best scoring one
1436  m.name = 'Best scoring model'
1437  m.stats = stats
1438  # Add RMSF info if available
1439  for c in state.all_modeled_components:
1440  e.load_rmsf(m, c)
1441 
1442  def _get_subassembly(self, comps, name, description):
1443  """Get an Assembly consisting of the given components.
1444  `compdict` is a dictionary of the components to add, where keys
1445  are the component names and values are the sequence ranges (or
1446  None to use all residues in the component)."""
1447  asyms = []
1448  for comp, seqrng in comps.items():
1449  a = self.asym_units[comp]
1450  asyms.append(a if seqrng is None else a(*seqrng))
1451 
1452  a = ihm.Assembly(asyms, name=name, description=description)
1453  return a
1454 
1455  def _add_foxs_restraint(self, model, comp, seqrange, dataset, rg, chi,
1456  details):
1457  """Add a basic FoXS fit. This is largely intended for use from the
1458  NPC application."""
1459  assembly = self._get_subassembly({comp:seqrange},
1460  name="SAXS subassembly",
1461  description="All components that fit SAXS data")
1462  r = ihm.restraint.SASRestraint(dataset, assembly, segment=False,
1463  fitting_method='FoXS', fitting_atom_type='Heavy atoms',
1464  multi_state=False, radius_of_gyration=rg, details=details)
1465  r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi)
1466  self.system.restraints.append(r)
1467  self._add_restraint_dataset(r) # so that all-dataset group works
1468 
1469  def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
1470  image_resolution, projection_number,
1471  micrographs_number):
1472  r = _EM2DRestraint(state, r, i, resolution, pixel_size,
1473  image_resolution, projection_number,
1474  micrographs_number)
1475  self.system.restraints.append(r)
1476  self._add_restraint_dataset(r) # so that all-dataset group works
1477 
1478  def add_em3d_restraint(self, state, target_ps, densities, pmi_restraint):
1479  # todo: need to set allow_duplicates on this dataset?
1480  r = _EM3DRestraint(self, state, pmi_restraint, target_ps, densities)
1481  self.system.restraints.append(r)
1482  self._add_restraint_dataset(r) # so that all-dataset group works
1483 
1484  def add_zaxial_restraint(self, state, ps, lower_bound, upper_bound,
1485  sigma, pmi_restraint):
1486  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1487  sigma, pmi_restraint, self._xy_plane)
1488 
1489  def add_yaxial_restraint(self, state, ps, lower_bound, upper_bound,
1490  sigma, pmi_restraint):
1491  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1492  sigma, pmi_restraint, self._xz_plane)
1493 
1494  def add_xyradial_restraint(self, state, ps, lower_bound, upper_bound,
1495  sigma, pmi_restraint):
1496  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1497  sigma, pmi_restraint, self._z_axis)
1498 
1499  def _add_geometric_restraint(self, state, ps, lower_bound, upper_bound,
1500  sigma, pmi_restraint, geom):
1501  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1502  r = _GeometricRestraint(self, state, pmi_restraint, geom,
1503  asym.get_feature(ps),
1504  ihm.restraint.LowerUpperBoundDistanceRestraint(
1505  lower_bound, upper_bound),
1506  sigma)
1507  self.system.restraints.append(r)
1508  self._add_restraint_dataset(r) # so that all-dataset group works
1509 
1510  def _get_membrane(self, tor_R, tor_r, tor_th):
1511  """Get an object representing a half-torus membrane"""
1512  if not hasattr(self, '_seen_membranes'):
1513  self._seen_membranes = {}
1514  # If a membrane already exists with all dimensions within 0.01 of
1515  # this one, reuse it
1516  membrane_id = tuple(int(x * 100.) for x in (tor_R, tor_r, tor_th))
1517  if membrane_id not in self._seen_membranes:
1518  m = ihm.geometry.HalfTorus(center=self._center_origin,
1519  transformation=self._identity_transform,
1520  major_radius=tor_R, minor_radius=tor_r, thickness=tor_th,
1521  inner=True, name='Membrane')
1522  self._seen_membranes[membrane_id] = m
1523  return self._seen_membranes[membrane_id]
1524 
1525  def add_membrane_surface_location_restraint(
1526  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1527  self._add_membrane_restraint(state, ps, tor_R, tor_r, tor_th, sigma,
1528  pmi_restraint, ihm.restraint.UpperBoundDistanceRestraint(0.))
1529 
1530  def add_membrane_exclusion_restraint(
1531  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1532  self._add_membrane_restraint(state, ps, tor_R, tor_r, tor_th, sigma,
1533  pmi_restraint, ihm.restraint.LowerBoundDistanceRestraint(0.))
1534 
1535  def _add_membrane_restraint(self, state, ps, tor_R, tor_r, tor_th,
1536  sigma, pmi_restraint, rsr):
1537  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1538  r = _GeometricRestraint(self, state, pmi_restraint,
1539  self._get_membrane(tor_R, tor_r, tor_th),
1540  asym.get_feature(ps), rsr, sigma)
1541  self.system.restraints.append(r)
1542  self._add_restraint_dataset(r) # so that all-dataset group works
1543 
1544  def add_model(self, group, assembly=None, representation=None):
1545  state = self._last_state
1546  if representation is None:
1547  representation = self.default_representation
1548  protocol = self.all_protocols.get_last_protocol(state)
1549  m = _Model(state.prot, self, protocol,
1550  assembly if assembly else state.modeled_assembly,
1551  representation)
1552  group.append(m)
1553  return m
1554 
1555  def _update_locations(self, filelocs):
1556  """Update FileLocation to point to a parent repository, if any"""
1557  all_repos = [m for m in self._metadata
1558  if isinstance(m, ihm.location.Repository)]
1559  for fileloc in filelocs:
1560  ihm.location.Repository._update_in_repos(fileloc, all_repos)
1561 
1562  _metadata = property(lambda self:
1563  itertools.chain.from_iterable(self._each_metadata))
Select non water and non hydrogen atoms.
Definition: pdb.h:245
def create_representation
Create a new Representation and return it.
Definition: mmcif.py:1135
def create_transformed_component
Make a new component that's a transformed copy of another.
Definition: mmcif.py:1192
Simple 3D transformation class.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:155
def exclude_coordinates
Don't record coordinates for the given domain.
Definition: mmcif.py:1145
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Class to encode a modeling protocol as mmCIF.
Definition: mmcif.py:1084
def flush
Write out all information to the file.
Definition: mmcif.py:1252
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: mmcif.py:1050
Miscellaneous utilities.
Definition: tools.py:1
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:690
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: mmcif.py:1068
void read_pdb(TextInput input, int model, Hierarchy h)
A single asymmetric unit in the system.
Definition: mmcif.py:1060
A single entity in the system.
Definition: mmcif.py:1033
Representation of the system.
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
Definition: mmcif.py:1401
Classes to represent data structures used in mmCIF.
Definition: data.py:1
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: mmcif.py:1072
void add_restraint(RMF::FileHandle fh, Restraint *hs)
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def finalize
Do any final processing on the class hierarchy.
Definition: mmcif.py:1259
Base class for capturing a modeling protocol.
Definition: output.py:40
Basic utilities for handling cryo-electron microscopy 3D density maps.
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:62
3D rotation class.
Definition: Rotation3D.h:46
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
A decorator for a residue.
Definition: Residue.h:134
Basic functionality that is expected to be used by a wide variety of IMP users.
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: mmcif.py:1046
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
VectorD< 3 > Vector3D
Definition: VectorD.h:395
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:189
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...