IMP logo
IMP Reference Guide  2.15.0
The Integrative Modeling Platform
mmcif.py
1 """@namespace IMP.pmi.mmcif
2  @brief Support for the mmCIF file format.
3 
4  IMP has basic support for writing out files in mmCIF format, for
5  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
6  mmCIF files are currently generated by creating an
7  IMP.pmi.mmcif.ProtocolOutput class, and attaching it to an
8  IMP.pmi.representation.Representation object, after which any
9  generated models and metadata are collected and output as mmCIF.
10 """
11 
12 from __future__ import print_function
13 import copy
14 import RMF
15 import IMP.core
16 import IMP.algebra
17 import IMP.atom
18 import IMP.em
19 import IMP.isd
20 import IMP.pmi.tools
21 import IMP.pmi.alphabets
22 from IMP.pmi.tools import OrderedDict
23 import IMP.pmi.output
24 import IMP.mmcif.data
25 import re
26 import ast
27 import sys
28 import os
29 import weakref
30 import ihm.location
31 import ihm.dataset
32 import ihm.dumper
33 import ihm.reader
34 import ihm.metadata
35 import ihm.startmodel
36 import ihm.model
37 import ihm.protocol
38 import ihm.analysis
39 import ihm.representation
40 import ihm.geometry
41 import ihm.cross_linkers
42 
43 
44 def _assign_id(obj, seen_objs, obj_by_id):
45  """Assign a unique ID to obj, and track all ids in obj_by_id."""
46  if obj not in seen_objs:
47  if not hasattr(obj, 'id'):
48  obj_by_id.append(obj)
49  obj.id = len(obj_by_id)
50  seen_objs[obj] = obj.id
51  else:
52  obj.id = seen_objs[obj]
53 
54 
55 def _get_by_residue(p):
56  """Determine whether the given particle represents a specific residue
57  or a more coarse-grained object."""
59 
60 
61 class _ComponentMapper(object):
62  """Map a Particle to a component name"""
63  def __init__(self, prot):
64  self.o = IMP.pmi.output.Output()
65  self.prot = prot
66  self.name = 'cif-output'
67  self.o.dictionary_pdbs[self.name] = self.prot
68  self.o._init_dictchain(self.name, self.prot, multichar_chain=True)
69 
70  def __getitem__(self, p):
71  protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
72  return protname
73 
74 
75 class _AsymMapper(object):
76  """Map a Particle to an asym_unit"""
77  def __init__(self, simo, prot):
78  self.simo = simo
79  self._cm = _ComponentMapper(prot)
80  self._seen_ranges = {}
81 
82  def __getitem__(self, p):
83  protname = self._cm[p]
84  return self.simo.asym_units[protname]
85 
86  def get_feature(self, ps):
87  """Get an ihm.restraint.Feature that covers the given particles"""
88  # todo: handle things other than residues
89  rngs = []
90  for p in ps:
91  asym = self[p]
92  # todo: handle overlapping ranges
94  rind = IMP.atom.Residue(p).get_index()
95  rng = asym(rind, rind)
97  # PMI Fragments always contain contiguous residues
98  rinds = IMP.atom.Fragment(p).get_residue_indexes()
99  rng = asym(rinds[0], rinds[-1])
100  else:
101  raise ValueError("Unsupported particle type %s" % str(p))
102  # Join contiguous ranges
103  if len(rngs) > 0 and rngs[-1].asym == asym \
104  and rngs[-1].seq_id_range[1] == rng.seq_id_range[0] - 1:
105  rngs[-1].seq_id_range = (rngs[-1].seq_id_range[0],
106  rng.seq_id_range[1])
107  else:
108  rngs.append(rng)
109  # If an identical feature already exists, return that
110  # todo: python-ihm should handle this automatically for us
111  hrngs = tuple(rngs)
112  if hrngs in self._seen_ranges:
113  return self._seen_ranges[hrngs]
114  else:
115  feat = ihm.restraint.ResidueFeature(rngs)
116  self._seen_ranges[hrngs] = feat
117  return feat
118 
119 
120 class _AllSoftware(object):
121  def __init__(self, system):
122  self.system = system
123  self.modeller_used = self.phyre2_used = False
124  self.pmi = ihm.Software(
125  name="IMP PMI module",
126  version=IMP.pmi.__version__,
127  classification="integrative model building",
128  description="integrative model building",
129  location='https://integrativemodeling.org')
130  self.imp = ihm.Software(
131  name="Integrative Modeling Platform (IMP)",
132  version=IMP.__version__,
133  classification="integrative model building",
134  description="integrative model building",
135  location='https://integrativemodeling.org')
136  self.system.software.extend([self.pmi, self.imp])
137 
138  def set_modeller_used(self, version, date):
139  if self.modeller_used:
140  return
141  self.modeller_used = True
142  self.system.software.append(ihm.Software(
143  name='MODELLER', classification='comparative modeling',
144  description='Comparative modeling by satisfaction '
145  'of spatial restraints, build ' + date,
146  location='https://salilab.org/modeller/',
147  version=version))
148 
149  def set_phyre2_used(self):
150  if self.phyre2_used:
151  return
152  self.phyre2_used = True
153  self.system.software.append(ihm.Software(
154  name='Phyre2', classification='protein homology modeling',
155  description='Protein Homology/analogY Recognition '
156  'Engine V 2.0',
157  version='2.0',
158  location='http://www.sbg.bio.ic.ac.uk/~phyre2/'))
159 
160 
161 def _get_fragment_is_rigid(fragment):
162  """Determine whether a fragment is modeled rigidly"""
163  leaves = IMP.atom.get_leaves(fragment.hier)
164  # Assume all leaves are set up as rigid/flexible in the same way
165  return IMP.core.RigidMember.get_is_setup(leaves[0])
166 
167 
168 class _PDBFragment(ihm.representation.ResidueSegment):
169  """Record details about part of a PDB file used as input
170  for a component."""
171  def __init__(self, state, component, start, end, pdb_offset,
172  pdbname, chain, hier, asym_unit):
173  # start, end are PMI residue indexes (not IHM)
174  super(_PDBFragment, self).__init__(
175  asym_unit=asym_unit.pmi_range(start, end),
176  rigid=None, primitive='sphere')
177  self.component, self.start, self.end, self.offset, self.pdbname \
178  = component, start, end, pdb_offset, pdbname
179  self.state, self.chain, self.hier = state, chain, hier
182  self.starting_hier = IMP.atom.read_pdb(pdbname, state.model, sel)
183 
184  rigid = property(lambda self: _get_fragment_is_rigid(self),
185  lambda self, val: None)
186 
187  def combine(self, other):
188  pass
189 
190 
191 class _BeadsFragment(ihm.representation.FeatureSegment):
192  """Record details about beads used to represent part of a component."""
193  chain = None
194 
195  def __init__(self, state, component, start, end, count, hier, asym_unit):
196  super(_BeadsFragment, self).__init__(
197  asym_unit=asym_unit(start, end), rigid=None, primitive='sphere',
198  count=count)
199  self.state, self.component, self.hier = state, component, hier
200 
201  rigid = property(lambda self: _get_fragment_is_rigid(self),
202  lambda self, val: None)
203 
204  def combine(self, other):
205  # todo: don't combine if one fragment is rigid and the other flexible
206  if (type(other) == type(self) and
207  other.asym_unit.seq_id_range[0]
208  == self.asym_unit.seq_id_range[1] + 1):
209  self.asym_unit.seq_id_range = (self.asym_unit.seq_id_range[0],
210  other.asym_unit.seq_id_range[1])
211  self.count += other.count
212  return True
213 
214 
215 class _AllModelRepresentations(object):
216  def __init__(self, simo):
217  self.simo = simo
218  # dict of fragments, ordered by representation, then component name,
219  # then state
220  self.fragments = OrderedDict()
221  self._all_representations = {}
222 
223  def copy_component(self, state, name, original, asym_unit):
224  """Copy all representation for `original` in `state` to `name`"""
225  def copy_frag(f):
226  newf = copy.copy(f)
227  newf.asym_unit = asym_unit(*f.asym_unit.seq_id_range)
228  return newf
229  for rep in self.fragments:
230  if original in self.fragments[rep]:
231  if name not in self.fragments[rep]:
232  self.fragments[rep][name] = OrderedDict()
233  self.fragments[rep][name][state] = [
234  copy_frag(f) for f in self.fragments[rep][original][state]]
235  # Assume representation for a component is the same in all
236  # states, so only write out the first one
237  first_state = list(self.fragments[rep][name].keys())[0]
238  if state is first_state:
239  representation = self._all_representations[rep]
240  representation.extend(self.fragments[rep][name][state])
241 
242  def add_fragment(self, state, representation, fragment):
243  """Add a model fragment."""
244  comp = fragment.component
245  id_rep = id(representation)
246  self._all_representations[id_rep] = representation
247  if id_rep not in self.fragments:
248  self.fragments[id_rep] = OrderedDict()
249  if comp not in self.fragments[id_rep]:
250  self.fragments[id_rep][comp] = OrderedDict()
251  if state not in self.fragments[id_rep][comp]:
252  self.fragments[id_rep][comp][state] = []
253  fragments = self.fragments[id_rep][comp][state]
254  if len(fragments) == 0 or not fragments[-1].combine(fragment):
255  fragments.append(fragment)
256  # Assume representation for a component is the same in all states,
257  # so only write out the first one
258  first_state = list(self.fragments[id_rep][comp].keys())[0]
259  if state is first_state:
260  representation.append(fragment)
261 
262 
263 class _AllDatasets(object):
264  """Track all datasets generated by PMI and add them to the ihm.System"""
265  def __init__(self, system):
266  self.system = system
267  self._datasets = []
268  self._datasets_by_state = {}
269  self._restraints_by_state = {}
270 
271  def get_all_group(self, state):
272  """Get a DatasetGroup encompassing all datasets so far in this state"""
273  # Note that if a restraint dataset is replaced after this is called,
274  # the group will still contain the old dataset - mark dataset as read
275  # only?
276  g = ihm.dataset.DatasetGroup(
277  self._datasets_by_state.get(state, [])
278  + [r.dataset for r in self._restraints_by_state.get(state, [])
279  if r.dataset])
280  return g
281 
282  def add(self, state, dataset):
283  """Add a new dataset."""
284  self._datasets.append(dataset)
285  if state not in self._datasets_by_state:
286  self._datasets_by_state[state] = []
287  self._datasets_by_state[state].append(dataset)
288  # Make sure that the dataset ends up in the mmCIF file even if nothing
289  # refers to it
290  self.system.orphan_datasets.append(dataset)
291  return dataset
292 
293  def add_restraint(self, state, restraint):
294  """Add the dataset for a restraint"""
295  if state not in self._restraints_by_state:
296  self._restraints_by_state[state] = []
297  self._restraints_by_state[state].append(restraint)
298 
299 
300 class _CrossLinkRestraint(ihm.restraint.CrossLinkRestraint):
301  """Restrain to a set of cross-links"""
302 
303  assembly = None
304  _label_map = {'wtDSS': 'DSS', 'scDSS': 'DSS', 'scEDC': 'EDC'}
305  _descriptor_map = {'DSS': ihm.cross_linkers.dss,
306  'EDC': ihm.cross_linkers.edc}
307 
308  def __init__(self, pmi_restraint):
309  self.pmi_restraint = pmi_restraint
310  # Use ChemDescriptor from PMI restraint if available, otherwise guess
311  # it using the label
312  linker = getattr(self.pmi_restraint, 'linker', None)
313  label = self.pmi_restraint.label
314  self.label = label
315  super(_CrossLinkRestraint, self).__init__(
316  dataset=self.pmi_restraint.dataset,
317  linker=linker or self._get_chem_descriptor(label))
318 
319  @classmethod
320  def _get_chem_descriptor(cls, label):
321  # Map commonly-used subtypes to more standard labels
322  label = cls._label_map.get(label, label)
323  if label not in cls._descriptor_map:
324  # If label is not a standard linker type, make a new chemical
325  # descriptor containing just the name. We don't know the chemistry
326  # so cannot specify a SMILES or INCHI string for it at this point
327  d = ihm.ChemDescriptor(label)
328  cls._descriptor_map[label] = d
329  return cls._descriptor_map[label]
330 
331  def _set_psi_sigma(self, model):
332  old_values = []
333  # Do nothing if given a different state than the restraint applies to
334  if model.m != self.pmi_restraint.model:
335  return
336  for resolution in self.pmi_restraint.sigma_dictionary:
337  statname = 'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, self.label)
338  if model.stats and statname in model.stats:
339  sigma = float(model.stats[statname])
340  p = self.pmi_restraint.sigma_dictionary[resolution][0]
341  old_values.append((p, p.get_scale()))
342  p.set_scale(sigma)
343  for psiindex in self.pmi_restraint.psi_dictionary:
344  statname = 'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, self.label)
345  if model.stats and statname in model.stats:
346  psi = float(model.stats[statname])
347  p = self.pmi_restraint.psi_dictionary[psiindex][0]
348  old_values.append((p, p.get_scale()))
349  p.set_scale(psi)
350  # If the same particle is set multiple times, make sure we get the
351  # original value last
352  return list(reversed(old_values))
353 
354  def add_fits_from_model_statfile(self, model):
355  # Set psi/sigma for all particles
356  old_values = self._set_psi_sigma(model)
357  # If no stats were collected, we can't show the fit
358  if not old_values:
359  return
360  for xl in self.cross_links:
361  # Get current psi/sigma particle value for each XL
362  xl.fits[model] = ihm.restraint.CrossLinkFit(
363  psi=xl.psi, sigma1=xl.sigma1, sigma2=xl.sigma2)
364  # Restore original psi/sigma
365  for p, scale in old_values:
366  p.set_scale(scale)
367 
368  # Have our dataset point to that in the original PMI restraint
369  def __set_dataset(self, val):
370  self.pmi_restraint.dataset = val
371  dataset = property(lambda self: self.pmi_restraint.dataset,
372  __set_dataset)
373 
374 
375 def get_asym_mapper_for_state(simo, state, asym_map):
376  asym = asym_map.get(state, None)
377  if asym is None:
378  asym = _AsymMapper(simo, state.prot)
379  asym_map[state] = asym
380  return asym
381 
382 
383 class _PMICrossLink(object):
384  # Query PMI particles to get psi and sigma values at time of use, not
385  # time of construction (since they may be set after creating the restraint)
386  psi = property(lambda self: self.psi_p.get_scale(),
387  lambda self, val: None)
388  sigma1 = property(lambda self: self.sigma1_p.get_scale(),
389  lambda self, val: None)
390  sigma2 = property(lambda self: self.sigma2_p.get_scale(),
391  lambda self, val: None)
392 
393 
394 class _ResidueCrossLink(ihm.restraint.ResidueCrossLink, _PMICrossLink):
395  pass
396 
397 
398 class _FeatureCrossLink(ihm.restraint.FeatureCrossLink, _PMICrossLink):
399  pass
400 
401 
402 class _EM2DRestraint(ihm.restraint.EM2DRestraint):
403  def __init__(self, state, pmi_restraint, image_number, resolution,
404  pixel_size, image_resolution, projection_number,
405  micrographs_number):
406  self.pmi_restraint, self.image_number = pmi_restraint, image_number
407  super(_EM2DRestraint, self).__init__(
408  dataset=pmi_restraint.datasets[image_number],
409  assembly=state.modeled_assembly,
410  segment=False, number_raw_micrographs=micrographs_number,
411  pixel_size_width=pixel_size, pixel_size_height=pixel_size,
412  image_resolution=image_resolution,
413  number_of_projections=projection_number)
414 
415  # Have our dataset point to that in the original PMI restraint
416  def __get_dataset(self):
417  return self.pmi_restraint.datasets[self.image_number]
418 
419  def __set_dataset(self, val):
420  self.pmi_restraint.datasets[self.image_number] = val
421 
422  dataset = property(__get_dataset, __set_dataset)
423 
424  def add_fits_from_model_statfile(self, model):
425  ccc = self._get_cross_correlation(model)
426  transform = self._get_transformation(model)
427  rot = transform.get_rotation()
428  rm = [[e for e in rot.get_rotation_matrix_row(i)] for i in range(3)]
429  self.fits[model] = ihm.restraint.EM2DRestraintFit(
430  cross_correlation_coefficient=ccc,
431  rot_matrix=rm,
432  tr_vector=transform.get_translation())
433 
434  def _get_transformation(self, model):
435  """Get the transformation that places the model on the image"""
436  stats = model.em2d_stats or model.stats
437  prefix = 'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
438  self.image_number + 1)
439  r = [float(stats[prefix + '_Rotation%d' % i]) for i in range(4)]
440  t = [float(stats[prefix + '_Translation%d' % i])
441  for i in range(3)]
442  # If the model coordinates are transformed, need to back transform
443  # them first
444  inv = model.transform.get_inverse()
446  IMP.algebra.Vector3D(*t)) * inv
447 
448  def _get_cross_correlation(self, model):
449  """Get the cross correlation coefficient between the model projection
450  and the image"""
451  stats = model.em2d_stats or model.stats
452  return float(stats['ElectronMicroscopy2D_%s_Image%d_CCC'
453  % (self.pmi_restraint.label,
454  self.image_number + 1)])
455 
456 
457 class _EM3DRestraint(ihm.restraint.EM3DRestraint):
458 
459  def __init__(self, simo, state, pmi_restraint, target_ps, densities):
460  self.pmi_restraint = pmi_restraint
461  super(_EM3DRestraint, self).__init__(
462  dataset=pmi_restraint.dataset,
463  assembly=self._get_assembly(densities, simo, state),
464  fitting_method='Gaussian mixture models',
465  number_of_gaussians=len(target_ps))
466 
467  # Have our dataset point to that in the original PMI restraint
468  def __set_dataset(self, val):
469  self.pmi_restraint.dataset = val
470  dataset = property(lambda self: self.pmi_restraint.dataset,
471  __set_dataset)
472 
473  def _get_assembly(self, densities, simo, state):
474  """Get the Assembly that this restraint acts on"""
475  cm = _ComponentMapper(state.prot)
476  components = {}
477  for d in densities:
478  components[cm[d]] = None # None == all residues in this component
479  a = simo._get_subassembly(
480  components, name="EM subassembly",
481  description="All components that fit the EM map")
482  return a
483 
484  def add_fits_from_model_statfile(self, model):
485  ccc = self._get_cross_correlation(model)
486  self.fits[model] = ihm.restraint.EM3DRestraintFit(
487  cross_correlation_coefficient=ccc)
488 
489  def _get_cross_correlation(self, model):
490  """Get the cross correlation coefficient between the model
491  and the map"""
492  if model.stats is not None:
493  return float(model.stats['GaussianEMRestraint_%s_CCC'
494  % self.pmi_restraint.label])
495 
496 
497 class _GeometricRestraint(ihm.restraint.GeometricRestraint):
498 
499  def __init__(self, simo, state, pmi_restraint, geometric_object,
500  feature, distance, sigma):
501  self.pmi_restraint = pmi_restraint
502  super(_GeometricRestraint, self).__init__(
503  dataset=pmi_restraint.dataset,
504  geometric_object=geometric_object, feature=feature,
505  distance=distance, harmonic_force_constant=1. / sigma,
506  restrain_all=True)
507 
508  # Have our dataset point to that in the original PMI restraint
509  def __set_dataset(self, val):
510  self.pmi_restraint.dataset = val
511  dataset = property(lambda self: self.pmi_restraint.dataset,
512  __set_dataset)
513 
514 
515 class _ReplicaExchangeProtocolStep(ihm.protocol.Step):
516  def __init__(self, state, rex):
517  if rex.monte_carlo_sample_objects is not None:
518  method = 'Replica exchange monte carlo'
519  else:
520  method = 'Replica exchange molecular dynamics'
521  self.monte_carlo_temperature = rex.vars['monte_carlo_temperature']
522  self.replica_exchange_minimum_temperature = \
523  rex.vars['replica_exchange_minimum_temperature']
524  self.replica_exchange_maximum_temperature = \
525  rex.vars['replica_exchange_maximum_temperature']
526  super(_ReplicaExchangeProtocolStep, self).__init__(
527  assembly=state.modeled_assembly,
528  dataset_group=None, # filled in by add_step()
529  method=method, name='Sampling',
530  num_models_begin=None, # filled in by add_step()
531  num_models_end=rex.vars["number_of_frames"],
532  multi_scale=True, multi_state=False, ordered=False)
533 
534 
535 class _ReplicaExchangeProtocolDumper(ihm.dumper.Dumper):
536  """Write IMP-specific information about replica exchange to mmCIF.
537  Note that IDs will have already been assigned by python-ihm's
538  standard modeling protocol dumper."""
539  def dump(self, system, writer):
540  with writer.loop("_imp_replica_exchange_protocol",
541  ["protocol_id", "step_id", "monte_carlo_temperature",
542  "replica_exchange_minimum_temperature",
543  "replica_exchange_maximum_temperature"]) as lp:
544  for p in system._all_protocols():
545  for s in p.steps:
546  if isinstance(s, _ReplicaExchangeProtocolStep):
547  self._dump_step(p, s, lp)
548 
549  def _dump_step(self, p, s, lp):
550  mintemp = s.replica_exchange_minimum_temperature
551  maxtemp = s.replica_exchange_maximum_temperature
552  lp.write(protocol_id=p._id, step_id=s._id,
553  monte_carlo_temperature=s.monte_carlo_temperature,
554  replica_exchange_minimum_temperature=mintemp,
555  replica_exchange_maximum_temperature=maxtemp)
556 
557 
558 class _ReplicaExchangeProtocolHandler(ihm.reader.Handler):
559  category = '_imp_replica_exchange_protocol'
560 
561  """Read IMP-specific information about replica exchange from mmCIF."""
562  def __call__(self, protocol_id, step_id, monte_carlo_temperature,
563  replica_exchange_minimum_temperature,
564  replica_exchange_maximum_temperature):
565  p = self.sysr.protocols.get_by_id(protocol_id)
566  # todo: match step_id properly (don't assume contiguous)
567  s = p.steps[int(step_id)-1]
568  # Upgrade from plain ihm Step to IMP subclass
569  s.__class__ = _ReplicaExchangeProtocolStep
570  s.monte_carlo_temperature = \
571  self.get_float(monte_carlo_temperature)
572  s.replica_exchange_minimum_temperature = \
573  self.get_float(replica_exchange_minimum_temperature)
574  s.replica_exchange_maximum_temperature = \
575  self.get_float(replica_exchange_maximum_temperature)
576 
577 
578 class _SimpleProtocolStep(ihm.protocol.Step):
579  def __init__(self, state, num_models_end, method):
580  super(_SimpleProtocolStep, self).__init__(
581  assembly=state.modeled_assembly,
582  dataset_group=None, # filled in by add_step()
583  method=method, name='Sampling',
584  num_models_begin=None, # filled in by add_step()
585  num_models_end=num_models_end,
586  multi_scale=True, multi_state=False, ordered=False)
587 
588 
589 class _Chain(object):
590  """Represent a single chain in a Model"""
591  def __init__(self, pmi_chain_id, asym_unit):
592  self.pmi_chain_id, self.asym_unit = pmi_chain_id, asym_unit
593  self.spheres = []
594  self.atoms = []
595 
596  def add(self, xyz, atom_type, residue_type, residue_index,
597  all_indexes, radius):
598  if atom_type is None:
599  self.spheres.append((xyz, residue_type, residue_index,
600  all_indexes, radius))
601  else:
602  self.atoms.append((xyz, atom_type, residue_type, residue_index,
603  all_indexes, radius))
604  orig_comp = property(lambda self: self.comp)
605 
606 
607 class _TransformedChain(object):
608  """Represent a chain that is a transformed version of another"""
609  def __init__(self, orig_chain, asym_unit, transform):
610  self.orig_chain, self.asym_unit = orig_chain, asym_unit
611  self.transform = transform
612 
613  def __get_spheres(self):
614  for (xyz, residue_type, residue_index, all_indexes,
615  radius) in self.orig_chain.spheres:
616  yield (self.transform * xyz, residue_type, residue_index,
617  all_indexes, radius)
618  spheres = property(__get_spheres)
619 
620  def __get_atoms(self):
621  for (xyz, atom_type, residue_type, residue_index, all_indexes,
622  radius) in self.orig_chain.atoms:
623  yield (self.transform * xyz, atom_type, residue_type,
624  residue_index, all_indexes, radius)
625  atoms = property(__get_atoms)
626 
627  entity = property(lambda self: self.orig_chain.entity)
628  orig_comp = property(lambda self: self.orig_chain.comp)
629 
630 
631 class _Excluder(object):
632  def __init__(self, component, simo):
633  self._seqranges = simo._exclude_coords.get(component, [])
634 
635  def is_excluded(self, indexes):
636  """Return True iff the given sequence range is excluded."""
637  for seqrange in self._seqranges:
638  if indexes[0] >= seqrange[0] and indexes[-1] <= seqrange[1]:
639  return True
640 
641 
642 class _Model(ihm.model.Model):
643  def __init__(self, prot, simo, protocol, assembly, representation):
644  super(_Model, self).__init__(assembly=assembly, protocol=protocol,
645  representation=representation)
646  self.simo = weakref.proxy(simo)
647  # Transformation from IMP coordinates into mmCIF coordinate system.
648  # Normally we pass through coordinates unchanged, but in some cases
649  # we may want to translate them (e.g. Nup84, where the deposited PDB
650  # files are all centered; we want the mmCIF files to match)
652  self.em2d_stats = None
653  self.stats = None
654  # True iff restraints act on this model
655  self._is_restrained = True
656  o = self.output = IMP.pmi.output.Output(atomistic=True)
657  name = 'cif-output'
658  self.m = prot.get_model()
659  o.dictionary_pdbs[name] = prot
660  o._init_dictchain(name, prot, multichar_chain=True)
661  (particle_infos_for_pdb,
662  self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
663  self.geometric_center = IMP.algebra.Vector3D(*self.geometric_center)
664  self._make_spheres_atoms(particle_infos_for_pdb, o, name, simo)
665  self.rmsf = {}
666 
667  def all_chains(self, simo):
668  """Yield all chains, including transformed ones"""
669  chain_for_comp = {}
670  for c in self.chains:
671  yield c
672  chain_for_comp[c.comp] = c
673  for tc in simo._transformed_components:
674  orig_chain = chain_for_comp.get(tc.original, None)
675  if orig_chain:
676  asym = simo.asym_units[tc.name]
677  c = _TransformedChain(orig_chain, asym, tc.transform)
678  c.comp = tc.name
679  yield c
680 
681  def _make_spheres_atoms(self, particle_infos_for_pdb, o, name, simo):
682  entity_for_chain = {}
683  comp_for_chain = {}
684  correct_asym = {}
685  for protname, chain_id in o.dictchain[name].items():
686  if protname in simo.entities:
687  entity_for_chain[chain_id] = simo.entities[protname]
688  else:
689  # Remove copy number
690  pn = protname.split('.')[0]
691  entity_for_chain[chain_id] = simo.entities[pn]
692  comp_for_chain[chain_id] = protname
693  # When doing multi-state modeling, the chain ID returned here
694  # (assigned sequentially) might not be correct (states may have
695  # gaps in the chain IDs). Map it to the correct asym unit.
696  correct_asym[chain_id] = simo.asym_units[protname]
697 
698  # Gather by chain ID (should be sorted by chain ID already)
699  self.chains = []
700  chain = None
701  excluder = None
702 
703  for (xyz, atom_type, residue_type, chain_id, residue_index,
704  all_indexes, radius) in particle_infos_for_pdb:
705  if chain is None or chain.pmi_chain_id != chain_id:
706  chain = _Chain(chain_id, correct_asym[chain_id])
707  chain.entity = entity_for_chain[chain_id]
708  chain.comp = comp_for_chain[chain_id]
709  self.chains.append(chain)
710  excluder = _Excluder(chain.comp, simo)
711  if not excluder.is_excluded(all_indexes if all_indexes
712  else [residue_index]):
713  chain.add(xyz, atom_type, residue_type, residue_index,
714  all_indexes, radius)
715 
716  def parse_rmsf_file(self, fname, component):
717  self.rmsf[component] = rmsf = {}
718  with open(fname) as fh:
719  for line in fh:
720  resnum, blocknum, val = line.split()
721  rmsf[int(resnum)] = (int(blocknum), float(val))
722 
723  def get_rmsf(self, component, indexes):
724  """Get the RMSF value for the given residue indexes."""
725  if not self.rmsf:
726  return None
727  rmsf = self.rmsf[component]
728  blocknums = dict.fromkeys(rmsf[ind][0] for ind in indexes)
729  if len(blocknums) != 1:
730  raise ValueError("Residue indexes %s aren't all in the same block"
731  % str(indexes))
732  return rmsf[indexes[0]][1]
733 
734  def get_atoms(self):
735  for chain in self.all_chains(self.simo):
736  pmi_offset = chain.asym_unit.entity.pmi_offset
737  for atom in chain.atoms:
738  (xyz, atom_type, residue_type, residue_index,
739  all_indexes, radius) = atom
740  pt = self.transform * xyz
741  yield ihm.model.Atom(
742  asym_unit=chain.asym_unit,
743  seq_id=residue_index - pmi_offset,
744  atom_id=atom_type.get_string(),
745  type_symbol=None, # todo: get element
746  x=pt[0], y=pt[1], z=pt[2])
747 
748  def get_spheres(self):
749  for chain in self.all_chains(self.simo):
750  pmi_offset = chain.asym_unit.entity.pmi_offset
751  for sphere in chain.spheres:
752  (xyz, residue_type, residue_index,
753  all_indexes, radius) = sphere
754  if all_indexes is None:
755  all_indexes = (residue_index,)
756  pt = self.transform * xyz
757  yield ihm.model.Sphere(
758  asym_unit=chain.asym_unit,
759  seq_id_range=(all_indexes[0] - pmi_offset,
760  all_indexes[-1] - pmi_offset),
761  x=pt[0], y=pt[1], z=pt[2], radius=radius,
762  rmsf=self.get_rmsf(chain.orig_comp, all_indexes))
763 
764 
765 class _AllProtocols(object):
766  def __init__(self, simo):
767  self.simo = weakref.proxy(simo)
768  # Lists of Protocols by state
769  self.protocols = OrderedDict()
770 
771  def add_protocol(self, state):
772  """Add a new Protocol"""
773  if state not in self.protocols:
774  self.protocols[state] = []
775  p = ihm.protocol.Protocol()
776  self.simo.system.orphan_protocols.append(p)
777  self.protocols[state].append(p)
778 
779  def add_step(self, step, state):
780  """Add a ProtocolStep to the last Protocol of the given State"""
781  if state not in self.protocols:
782  self.add_protocol(state)
783  protocol = self.get_last_protocol(state)
784  if len(protocol.steps) == 0:
785  step.num_models_begin = 0
786  else:
787  step.num_models_begin = protocol.steps[-1].num_models_end
788  protocol.steps.append(step)
789  step.id = len(protocol.steps)
790  # Assume that protocol uses all currently-defined datasets
791  step.dataset_group = self.simo.all_datasets.get_all_group(state)
792 
793  def add_postproc(self, step, state):
794  """Add a postprocessing step to the last protocol"""
795  protocol = self.get_last_protocol(state)
796  if len(protocol.analyses) == 0:
797  protocol.analyses.append(ihm.analysis.Analysis())
798  protocol.analyses[-1].steps.append(step)
799 
800  def get_last_protocol(self, state):
801  """Return the most recently-added _Protocol"""
802  return self.protocols[state][-1]
803 
804 
805 class _AllStartingModels(object):
806  def __init__(self, simo):
807  self.simo = simo
808  # dict of starting models (entire PDB files), collected from fragments,
809  # ordered by component name and state
810  self.models = OrderedDict()
811  self.output = IMP.pmi.output.Output()
812 
813  def add_pdb_fragment(self, fragment):
814  """Add a starting model PDB fragment."""
815  comp = fragment.component
816  state = fragment.state
817  if comp not in self.models:
818  self.models[comp] = OrderedDict()
819  if state not in self.models[comp]:
820  self.models[comp][state] = []
821  models = self.models[comp][state]
822  if len(models) == 0 \
823  or models[-1].fragments[0].pdbname != fragment.pdbname:
824  model = self._add_model(fragment)
825  models.append(model)
826  else:
827  # Break circular ref between fragment and model
828  models[-1].fragments.append(weakref.proxy(fragment))
829  # Update residue range to cover all fragments
830  pmi_offset = models[-1].asym_unit.entity.pmi_offset
831  sid_begin = min(fragment.start + fragment.offset - pmi_offset,
832  models[-1].asym_unit.seq_id_range[0])
833  sid_end = max(fragment.end + fragment.offset - pmi_offset,
834  models[-1].asym_unit.seq_id_range[1])
835  models[-1].asym_unit = fragment.asym_unit.asym(sid_begin, sid_end)
836  fragment.starting_model = models[-1]
837 
838  def _add_model(self, f):
839  parser = ihm.metadata.PDBParser()
840  r = parser.parse_file(f.pdbname)
841 
842  self.simo._add_dataset(r['dataset'])
843  # We only want the templates that model the starting model chain
844  templates = r['templates'].get(f.chain, [])
845  for t in templates:
846  if t.alignment_file:
847  self.simo.system.locations.append(t.alignment_file)
848  if t.dataset:
849  self.simo._add_dataset(t.dataset)
850  source = r['entity_source'].get(f.chain)
851  if source:
852  f.asym_unit.entity.source = source
853  pmi_offset = f.asym_unit.entity.pmi_offset
854  m = _StartingModel(
855  asym_unit=f.asym_unit.asym.pmi_range(f.start, f.end),
856  dataset=r['dataset'], asym_id=f.chain,
857  templates=templates, offset=f.offset + pmi_offset,
858  metadata=r['metadata'],
859  software=r['software'][0] if r['software'] else None,
860  script_file=r['script'])
861  m.fragments = [weakref.proxy(f)]
862  return m
863 
864 
865 class _StartingModel(ihm.startmodel.StartingModel):
866  def get_seq_dif(self):
867  return self._seq_dif
868 
869  def get_atoms(self):
870  pmi_offset = self.asym_unit.entity.pmi_offset
871  mh = IMP.mmcif.data._StartingModelAtomHandler(self.templates,
872  self.asym_unit)
873  for f in self.fragments:
874  sel = IMP.atom.Selection(
875  f.starting_hier,
876  residue_indexes=list(range(f.start - f.offset,
877  f.end - f.offset + 1)))
878  for a in mh.get_ihm_atoms(sel.get_selected_particles(),
879  f.offset - pmi_offset):
880  yield a
881  self._seq_dif = mh._seq_dif
882 
883 
884 class _ReplicaExchangeAnalysisPostProcess(ihm.analysis.ClusterStep):
885  """Post processing using AnalysisReplicaExchange0 macro"""
886 
887  def __init__(self, rex, num_models_begin):
888  self.rex = rex
889  num_models_end = 0
890  for fname in self.get_all_stat_files():
891  with open(fname) as fh:
892  num_models_end += len(fh.readlines())
893  super(_ReplicaExchangeAnalysisPostProcess, self).__init__(
894  feature='RMSD', num_models_begin=num_models_begin,
895  num_models_end=num_models_end)
896 
897  def get_stat_file(self, cluster_num):
898  return os.path.join(self.rex._outputdir, "cluster.%d" % cluster_num,
899  'stat.out')
900 
901  def get_all_stat_files(self):
902  for i in range(self.rex._number_of_clusters):
903  yield self.get_stat_file(i)
904 
905 
906 class _ReplicaExchangeAnalysisEnsemble(ihm.model.Ensemble):
907  """Ensemble generated using AnalysisReplicaExchange0 macro"""
908 
909  num_models_deposited = None # Override base class property
910 
911  def __init__(self, pp, cluster_num, model_group, num_deposit):
912  with open(pp.get_stat_file(cluster_num)) as fh:
913  num_models = len(fh.readlines())
914  super(_ReplicaExchangeAnalysisEnsemble, self).__init__(
915  num_models=num_models,
916  model_group=model_group, post_process=pp,
917  clustering_feature=pp.feature,
918  name=model_group.name)
919  self.cluster_num = cluster_num
920  self.num_models_deposited = num_deposit
921 
922  def get_rmsf_file(self, component):
923  return os.path.join(self.post_process.rex._outputdir,
924  'cluster.%d' % self.cluster_num,
925  'rmsf.%s.dat' % component)
926 
927  def load_rmsf(self, model, component):
928  fname = self.get_rmsf_file(component)
929  if os.path.exists(fname):
930  model.parse_rmsf_file(fname, component)
931 
932  def get_localization_density_file(self, fname):
933  return os.path.join(self.post_process.rex._outputdir,
934  'cluster.%d' % self.cluster_num,
935  '%s.mrc' % fname)
936 
937  def load_localization_density(self, state, fname, select_tuple,
938  asym_units):
939  fullpath = self.get_localization_density_file(fname)
940  if os.path.exists(fullpath):
941  details = "Localization density for %s %s" \
942  % (fname, self.model_group.name)
943  local_file = ihm.location.OutputFileLocation(fullpath,
944  details=details)
945  for s in select_tuple:
946  if isinstance(s, tuple) and len(s) == 3:
947  asym = asym_units[s[2]].pmi_range(s[0], s[1])
948  else:
949  asym = asym_units[s]
950  den = ihm.model.LocalizationDensity(file=local_file,
951  asym_unit=asym)
952  self.densities.append(den)
953 
954  def load_all_models(self, simo, state):
955  stat_fname = self.post_process.get_stat_file(self.cluster_num)
956  model_num = 0
957  with open(stat_fname) as fh:
958  stats = ast.literal_eval(fh.readline())
959  # Correct path
960  rmf_file = os.path.join(os.path.dirname(stat_fname),
961  "%d.rmf3" % model_num)
962  # todo: test with real PMI2 systems
963  if os.path.exists(rmf_file):
964  rh = RMF.open_rmf_file_read_only(rmf_file)
965  system = state._pmi_object.system
966  IMP.rmf.link_hierarchies(rh, [system.hier])
967  IMP.rmf.load_frame(fh, RMF.FrameID(0))
968  # todo: fill in other data from stat file, e.g. cross-link phi/psi
969  yield stats
970  model_num += 1
971  if model_num >= self.num_models_deposited:
972  return
973 
974  # todo: also support dRMS precision
975  def _get_precision(self):
976  precfile = os.path.join(self.post_process.rex._outputdir,
977  "precision.%d.%d.out" % (self.cluster_num,
978  self.cluster_num))
979  if not os.path.exists(precfile):
980  return ihm.unknown
981  # Fail if the precision.x.x.out file doesn't match the cluster
982  r = re.compile(
983  r'All .*/cluster.%d/ average centroid distance ([\d\.]+)'
984  % self.cluster_num)
985  with open(precfile) as fh:
986  for line in fh:
987  m = r.match(line)
988  if m:
989  return float(m.group(1))
990 
991  precision = property(lambda self: self._get_precision(),
992  lambda self, val: None)
993 
994 
995 class _SimpleEnsemble(ihm.model.Ensemble):
996  """Simple manually-created ensemble"""
997 
998  num_models_deposited = None # Override base class property
999 
1000  def __init__(self, pp, model_group, num_models, drmsd,
1001  num_models_deposited, ensemble_file):
1002  super(_SimpleEnsemble, self).__init__(
1003  model_group=model_group, post_process=pp, num_models=num_models,
1004  file=ensemble_file, precision=drmsd, name=model_group.name,
1005  clustering_feature='dRMSD')
1006  self.num_models_deposited = num_models_deposited
1007 
1008  def load_localization_density(self, state, component, asym, local_file):
1009  den = ihm.model.LocalizationDensity(file=local_file, asym_unit=asym)
1010  self.densities.append(den)
1011 
1012 
1013 class _CustomDNAAlphabet(object):
1014  """Custom DNA alphabet that maps A,C,G,T (rather than DA,DC,DG,DT
1015  as in python-ihm)"""
1016  _comps = dict([cc.code_canonical, cc]
1017  for cc in ihm.DNAAlphabet._comps.values())
1018 
1019 
1020 class _EntityMapper(dict):
1021  """Handle mapping from IMP components (without copy number) to CIF
1022  entities. Multiple components may map to the same entity if they
1023  share sequence."""
1024  def __init__(self, system):
1025  super(_EntityMapper, self).__init__()
1026  self._sequence_dict = {}
1027  self._entities = []
1028  self.system = system
1029 
1030  def _get_alphabet(self, alphabet):
1031  """Map a PMI alphabet to an IHM alphabet"""
1032  # todo: we should probably ignore alphabets entirely and use
1033  # residue types directly (e.g. we shouldn't compare one-letter
1034  # code sequence to determine if an Entity is unique)
1035  alphabet_map = {None: ihm.LPeptideAlphabet,
1036  IMP.pmi.alphabets.amino_acid: ihm.LPeptideAlphabet,
1037  IMP.pmi.alphabets.rna: ihm.RNAAlphabet,
1038  IMP.pmi.alphabets.dna: _CustomDNAAlphabet}
1039  if alphabet in alphabet_map:
1040  return alphabet_map[alphabet]
1041  else:
1042  raise TypeError("Don't know how to handle %s" % alphabet)
1043 
1044  def add(self, component_name, sequence, offset, alphabet):
1045  def entity_seq(sequence):
1046  # Map X to UNK
1047  if 'X' in sequence:
1048  return ['UNK' if s == 'X' else s for s in sequence]
1049  else:
1050  return sequence
1051  if sequence not in self._sequence_dict:
1052  # Use the name of the first component, stripped of any copy number,
1053  # as the description of the entity
1054  d = component_name.split("@")[0].split(".")[0]
1055  entity = Entity(entity_seq(sequence), description=d,
1056  pmi_offset=offset,
1057  alphabet=self._get_alphabet(alphabet))
1058  self.system.entities.append(entity)
1059  self._sequence_dict[sequence] = entity
1060  self[component_name] = self._sequence_dict[sequence]
1061 
1062 
1063 class _TransformedComponent(object):
1064  def __init__(self, name, original, transform):
1065  self.name, self.original, self.transform = name, original, transform
1066 
1067 
1068 class _SimpleRef(object):
1069  """Class with similar interface to weakref.ref, but keeps a strong ref"""
1070  def __init__(self, ref):
1071  self.ref = ref
1072 
1073  def __call__(self):
1074  return self.ref
1075 
1076 
1077 class _State(ihm.model.State):
1078  """Representation of a single state in the system."""
1079 
1080  def __init__(self, pmi_object, po):
1081  # Point to the PMI object for this state. Use a weak reference
1082  # since the state object typically points to us too, so we need
1083  # to break the reference cycle. In PMI1 this will be a
1084  # Representation object; in PMI2 it is the PMI2 State object itself.
1085  self._pmi_object = weakref.proxy(pmi_object)
1086  if hasattr(pmi_object, 'state'):
1087  # Need a strong ref to pmi_object.state to prevent it from being
1088  # cleaned up when doing PMI1 multistate modeling
1089  self._pmi_state = _SimpleRef(pmi_object.state)
1090  else:
1091  self._pmi_state = weakref.ref(pmi_object)
1092  # Preserve PMI state name
1093  old_name = self.name
1094  super(_State, self).__init__(experiment_type='Fraction of bulk')
1095  self.name = old_name
1096 
1097  # The assembly of all components modeled by IMP in this state.
1098  # This may be smaller than the complete assembly.
1099  self.modeled_assembly = ihm.Assembly(
1100  name="Modeled assembly",
1101  description=self.get_postfixed_name(
1102  "All components modeled by IMP"))
1103  po.system.orphan_assemblies.append(self.modeled_assembly)
1104 
1105  self.all_modeled_components = []
1106 
1107  def __hash__(self):
1108  return hash(self._pmi_state())
1109 
1110  def __eq__(self, other):
1111  return self._pmi_state() == other._pmi_state()
1112 
1113  def add_model_group(self, group):
1114  self.append(group)
1115  return group
1116 
1117  def get_prefixed_name(self, name):
1118  """Prefix the given name with the state name, if available."""
1119  if self.short_name:
1120  return self.short_name + ' ' + name
1121  else:
1122  # Can't use capitalize() since that un-capitalizes everything
1123  # but the first letter
1124  return name[0].upper() + name[1:] if name else ''
1125 
1126  def get_postfixed_name(self, name):
1127  """Postfix the given name with the state name, if available."""
1128  if self.short_name:
1129  return "%s in state %s" % (name, self.short_name)
1130  else:
1131  return name
1132 
1133  short_name = property(lambda self: self._pmi_state().short_name)
1134  long_name = property(lambda self: self._pmi_state().long_name)
1135 
1136  def __get_name(self):
1137  return self._pmi_state().long_name
1138 
1139  def __set_name(self, val):
1140  self._pmi_state().long_name = val
1141 
1142  name = property(__get_name, __set_name)
1143 
1144 
1145 class Entity(ihm.Entity):
1146  """A single entity in the system.
1147  This functions identically to the base ihm.Entity class, but it
1148  allows identifying residues by either the IHM numbering scheme
1149  (seq_id, which is always contiguous starting at 1) or by the PMI
1150  scheme (which is similar, but may not start at 1 if the sequence in
1151  the FASTA file has one or more N-terminal gaps"""
1152  def __init__(self, sequence, pmi_offset, *args, **keys):
1153  # Offset between PMI numbering and IHM; <pmi_#> = <ihm_#> + pmi_offset
1154  # (pmi_offset is also the number of N-terminal gaps in the FASTA file)
1155  self.pmi_offset = pmi_offset
1156  super(Entity, self).__init__(sequence, *args, **keys)
1157 
1158  def pmi_residue(self, res_id):
1159  """Return a single IHM residue indexed using PMI numbering"""
1160  return self.residue(res_id - self.pmi_offset)
1161 
1162  def pmi_range(self, res_id_begin, res_id_end):
1163  """Return a range of IHM residues indexed using PMI numbering"""
1164  off = self.pmi_offset
1165  return self(res_id_begin - off, res_id_end - off)
1166 
1167 
1168 class AsymUnit(ihm.AsymUnit):
1169  """A single asymmetric unit in the system.
1170  This functions identically to the base ihm.AsymUnit class, but it
1171  allows identifying residues by either the IHM numbering scheme
1172  (seq_id, which is always contiguous starting at 1) or by the PMI
1173  scheme (which is similar, but may not start at 1 if the sequence in
1174  the FASTA file has one or more N-terminal gaps"""
1175 
1176  def __init__(self, entity, *args, **keys):
1177  super(AsymUnit, self).__init__(
1178  entity, auth_seq_id_map=entity.pmi_offset, *args, **keys)
1179 
1180  def pmi_residue(self, res_id):
1181  """Return a single IHM residue indexed using PMI numbering"""
1182  return self.residue(res_id - self.entity.pmi_offset)
1183 
1184  def pmi_range(self, res_id_begin, res_id_end):
1185  """Return a range of IHM residues indexed using PMI numbering"""
1186  off = self.entity.pmi_offset
1187  return self(res_id_begin - off, res_id_end - off)
1188 
1189 
1191  """Class to encode a modeling protocol as mmCIF.
1192 
1193  IMP has basic support for writing out files in mmCIF format, for
1194  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
1195  After creating an instance of this class, attach it to an
1196  IMP.pmi.topology.System object. After this, any
1197  generated models and metadata are automatically collected in the
1198  `system` attribute, which is an
1199  [ihm.System](https://python-ihm.readthedocs.io/en/latest/main.html#ihm.System) object.
1200  Once the protocol is complete, call finalize() to make sure `system`
1201  contains everything, then use the
1202  [python-ihm API](https://python-ihm.readthedocs.io/en/latest/dumper.html#ihm.dumper.write)
1203  to write out files in mmCIF or BinaryCIF format.
1204 
1205  See also get_handlers(), get_dumpers().
1206  """ # noqa: E501
1207  def __init__(self):
1208  # Ultimately, collect data in an ihm.System object
1209  self.system = ihm.System()
1210  self._state_group = ihm.model.StateGroup()
1211  self.system.state_groups.append(self._state_group)
1212 
1213  self._state_ensemble_offset = 0
1214  self._main_script = os.path.abspath(sys.argv[0])
1215 
1216  # Point to the main modeling script
1217  loc = ihm.location.WorkflowFileLocation(
1218  path=self._main_script,
1219  details="The main integrative modeling script")
1220  self.system.locations.append(loc)
1221 
1222  self._states = {}
1223  self.__asym_states = {}
1224  self._working_directory = os.getcwd()
1225  self.default_representation = self.create_representation(
1226  "Default representation")
1227  self.entities = _EntityMapper(self.system)
1228  # Mapping from component names to ihm.AsymUnit
1229  self.asym_units = {}
1230  self._all_components = {}
1231  self.all_modeled_components = []
1232  self._transformed_components = []
1233  self.sequence_dict = {}
1234 
1235  # Common geometric objects used in PMI systems
1236  self._xy_plane = ihm.geometry.XYPlane()
1237  self._xz_plane = ihm.geometry.XZPlane()
1238  self._z_axis = ihm.geometry.ZAxis()
1239  self._center_origin = ihm.geometry.Center(0, 0, 0)
1240  self._identity_transform = ihm.geometry.Transformation.identity()
1241 
1242  # Coordinates to exclude
1243  self._exclude_coords = {}
1244 
1245  self.all_representations = _AllModelRepresentations(self)
1246  self.all_protocols = _AllProtocols(self)
1247  self.all_datasets = _AllDatasets(self.system)
1248  self.all_starting_models = _AllStartingModels(self)
1249 
1250  self.all_software = _AllSoftware(self.system)
1251 
1252  def create_representation(self, name):
1253  """Create a new Representation and return it. This can be
1254  passed to add_model(), add_bead_element() or add_pdb_element()."""
1255  r = ihm.representation.Representation(name=name)
1256  self.system.orphan_representations.append(r)
1257  return r
1258 
1259  def exclude_coordinates(self, component, seqrange):
1260  """Don't record coordinates for the given domain.
1261  Coordinates for the given domain (specified by a component name
1262  and a 2-element tuple giving the start and end residue numbers)
1263  will be excluded from the mmCIF file. This can be used to exclude
1264  parts of the structure that weren't well resolved in modeling.
1265  Any bead or residue that lies wholly within this range will be
1266  excluded. Multiple ranges for a given component can be excluded
1267  by calling this method multiple times."""
1268  if component not in self._exclude_coords:
1269  self._exclude_coords[component] = []
1270  self._exclude_coords[component].append(seqrange)
1271 
1272  def _is_excluded(self, component, start, end):
1273  """Return True iff this chunk of sequence should be excluded"""
1274  for seqrange in self._exclude_coords.get(component, ()):
1275  if start >= seqrange[0] and end <= seqrange[1]:
1276  return True
1277 
1278  def _add_state(self, state):
1279  """Create a new state and return a pointer to it."""
1280  self._state_ensemble_offset = len(self.system.ensembles)
1281  s = _State(state, self)
1282  self._state_group.append(s)
1283  self._last_state = s
1284  return s
1285 
1286  def _get_chain_for_component(self, name, output):
1287  """Get the chain ID for a component, if any."""
1288  # todo: handle multiple copies
1289  if name in self.asym_units:
1290  return self.asym_units[name]._id
1291  else:
1292  # A non-modeled component doesn't have a chain ID
1293  return None
1294 
1295  def _get_assembly_comps(self, assembly):
1296  """Get the names of the components in the given assembly"""
1297  # component name = asym_unit.details
1298  comps = {}
1299  for ca in assembly:
1300  comps[ca.details] = None
1301  return comps
1302 
1303  def create_transformed_component(self, state, name, original, transform):
1304  """Make a new component that's a transformed copy of another.
1305  All representation for the existing component is copied to the
1306  new one."""
1307  assembly_comps = self._get_assembly_comps(state.modeled_assembly)
1308  if name in assembly_comps:
1309  raise ValueError("Component %s already exists" % name)
1310  elif original not in assembly_comps:
1311  raise ValueError("Original component %s does not exist" % original)
1312  self.create_component(state, name, True)
1313  self.add_component_sequence(state, name, self.sequence_dict[original])
1314  self._transformed_components.append(_TransformedComponent(
1315  name, original, transform))
1316  self.all_representations.copy_component(state, name, original,
1317  self.asym_units[name])
1318 
1319  def create_component(self, state, name, modeled, asym_name=None):
1320  if asym_name is None:
1321  asym_name = name
1322  new_comp = name not in self._all_components
1323  self._all_components[name] = None
1324  if modeled:
1325  state.all_modeled_components.append(name)
1326  if asym_name not in self.asym_units:
1327  # assign asym once we get sequence
1328  self.asym_units[asym_name] = None
1329  if new_comp:
1330  self.all_modeled_components.append(name)
1331 
1332  def add_component_sequence(self, state, name, seq, asym_name=None,
1333  alphabet=None):
1334  if asym_name is None:
1335  asym_name = name
1336 
1337  def get_offset(seq):
1338  # Count length of gaps at start of sequence
1339  for i in range(len(seq)):
1340  if seq[i] != '-':
1341  return seq[i:], i
1342  seq, offset = get_offset(seq)
1343  if name in self.sequence_dict:
1344  if self.sequence_dict[name] != seq:
1345  raise ValueError("Sequence mismatch for component %s" % name)
1346  else:
1347  self.sequence_dict[name] = seq
1348  self.entities.add(name, seq, offset, alphabet)
1349  if asym_name in self.asym_units:
1350  if self.asym_units[asym_name] is None:
1351  # Set up a new asymmetric unit for this component
1352  entity = self.entities[name]
1353  asym = AsymUnit(entity, details=asym_name)
1354  self.system.asym_units.append(asym)
1355  self.asym_units[asym_name] = asym
1356  state.modeled_assembly.append(self.asym_units[asym_name])
1357 
1358  def _add_restraint_model_fits(self):
1359  """Add fits to restraints for all known models"""
1360  for group, m in self.system._all_models():
1361  if m._is_restrained:
1362  for r in self.system.restraints:
1363  if hasattr(r, 'add_fits_from_model_statfile'):
1364  r.add_fits_from_model_statfile(m)
1365 
1366  def finalize(self):
1367  """Do any final processing on the class hierarchy.
1368  After calling this method, the `system` member (an instance
1369  of `ihm.System`) completely reproduces the PMI modeling, and
1370  can be written out to an mmCIF file with `ihm.dumper.write`,
1371  and/or modified using the ihm API."""
1372  self._add_restraint_model_fits()
1373 
1374  def add_pdb_element(self, state, name, start, end, offset, pdbname,
1375  chain, hier, representation=None):
1376  if self._is_excluded(name, start, end):
1377  return
1378  if representation is None:
1379  representation = self.default_representation
1380  asym = self.asym_units[name]
1381  p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
1382  hier, asym)
1383  self.all_representations.add_fragment(state, representation, p)
1384  self.all_starting_models.add_pdb_fragment(p)
1385 
1386  def add_bead_element(self, state, name, start, end, num, hier,
1387  representation=None):
1388  if self._is_excluded(name, start, end):
1389  return
1390  if representation is None:
1391  representation = self.default_representation
1392  asym = self.asym_units[name]
1393  pmi_offset = asym.entity.pmi_offset
1394  b = _BeadsFragment(state, name, start - pmi_offset, end - pmi_offset,
1395  num, hier, asym)
1396  self.all_representations.add_fragment(state, representation, b)
1397 
1398  def get_cross_link_group(self, pmi_restraint):
1399  r = _CrossLinkRestraint(pmi_restraint)
1400  self.system.restraints.append(r)
1401  self._add_restraint_dataset(r) # so that all-dataset group works
1402  return r
1403 
1404  def add_experimental_cross_link(self, r1, c1, r2, c2, rsr):
1405  if c1 not in self._all_components or c2 not in self._all_components:
1406  # Crosslink refers to a component we didn't model
1407  # As a quick hack, just ignore it.
1408  # todo: need to add an entity for this anyway (so will need the
1409  # sequence, and add to struct_assembly)
1410  return None
1411  e1 = self.entities[c1]
1412  e2 = self.entities[c2]
1413  xl = ihm.restraint.ExperimentalCrossLink(residue1=e1.pmi_residue(r1),
1414  residue2=e2.pmi_residue(r2))
1415  rsr.experimental_cross_links.append([xl])
1416  return xl
1417 
1418  def add_cross_link(self, state, ex_xl, p1, p2, length, sigma1_p, sigma2_p,
1419  psi_p, rsr):
1420  # Map p1/p2 to asym units
1421  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1422  d = ihm.restraint.UpperBoundDistanceRestraint(length)
1423 
1424  if _get_by_residue(p1) and _get_by_residue(p2):
1425  cls = _ResidueCrossLink
1426  else:
1427  cls = _FeatureCrossLink
1428  xl = cls(ex_xl, asym1=asym[p1], asym2=asym[p2], distance=d,
1429  restrain_all=True)
1430  # Needed to look up fits later
1431  xl.psi_p, xl.sigma1_p, xl.sigma2_p = psi_p, sigma1_p, sigma2_p
1432  rsr.cross_links.append(xl)
1433 
1434  def add_replica_exchange(self, state, rex):
1435  # todo: allow for metadata to say how many replicas were used in the
1436  # actual experiment, and how many independent runs were carried out
1437  # (use these as multipliers to get the correct total number of
1438  # output models)
1439  step = _ReplicaExchangeProtocolStep(state, rex)
1440  step.software = self.all_software.pmi
1441  self.all_protocols.add_step(step, state)
1442 
1443  def _add_simple_dynamics(self, num_models_end, method):
1444  # Always assumed that we're dealing with the last state
1445  state = self._last_state
1446  self.all_protocols.add_step(_SimpleProtocolStep(state, num_models_end,
1447  method), state)
1448 
1449  def _add_protocol(self):
1450  # Always assumed that we're dealing with the last state
1451  state = self._last_state
1452  self.all_protocols.add_protocol(state)
1453 
1454  def _add_dataset(self, dataset):
1455  return self.all_datasets.add(self._last_state, dataset)
1456 
1457  def _add_restraint_dataset(self, restraint):
1458  return self.all_datasets.add_restraint(self._last_state, restraint)
1459 
1460  def _add_simple_postprocessing(self, num_models_begin, num_models_end):
1461  # Always assumed that we're dealing with the last state
1462  state = self._last_state
1463  pp = ihm.analysis.ClusterStep('RMSD', num_models_begin, num_models_end)
1464  self.all_protocols.add_postproc(pp, state)
1465  return pp
1466 
1467  def _add_no_postprocessing(self, num_models):
1468  # Always assumed that we're dealing with the last state
1469  state = self._last_state
1470  pp = ihm.analysis.EmptyStep()
1471  pp.num_models_begin = pp.num_models_end = num_models
1472  self.all_protocols.add_postproc(pp, state)
1473  return pp
1474 
1475  def _add_simple_ensemble(self, pp, name, num_models, drmsd,
1476  num_models_deposited, localization_densities,
1477  ensemble_file):
1478  """Add an ensemble generated by ad hoc methods (not using PMI).
1479  This is currently only used by the Nup84 system."""
1480  # Always assumed that we're dealing with the last state
1481  state = self._last_state
1482  group = ihm.model.ModelGroup(name=state.get_postfixed_name(name))
1483  state.add_model_group(group)
1484  if ensemble_file:
1485  self.system.locations.append(ensemble_file)
1486  e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
1487  ensemble_file)
1488  self.system.ensembles.append(e)
1489  for c in state.all_modeled_components:
1490  den = localization_densities.get(c, None)
1491  if den:
1492  e.load_localization_density(state, c, self.asym_units[c], den)
1493  return e
1494 
1495  def set_ensemble_file(self, i, location):
1496  """Point a previously-created ensemble to an 'all-models' file.
1497  This could be a trajectory such as DCD, an RMF, or a multimodel
1498  PDB file."""
1499  self.system.locations.append(location)
1500  # Ensure that we point to an ensemble related to the current state
1501  ind = i + self._state_ensemble_offset
1502  self.system.ensembles[ind].file = location
1503 
1504  def add_replica_exchange_analysis(self, state, rex, density_custom_ranges):
1505  # todo: add prefilter as an additional postprocess step (complication:
1506  # we don't know how many models it filtered)
1507  # todo: postpone rmsf/density localization extraction until after
1508  # relevant methods are called (currently it is done from the
1509  # constructor)
1510  protocol = self.all_protocols.get_last_protocol(state)
1511  num_models = protocol.steps[-1].num_models_end
1512  pp = _ReplicaExchangeAnalysisPostProcess(rex, num_models)
1513  pp.software = self.all_software.pmi
1514  self.all_protocols.add_postproc(pp, state)
1515  for i in range(rex._number_of_clusters):
1516  group = ihm.model.ModelGroup(name=state.get_prefixed_name(
1517  'cluster %d' % (i + 1)))
1518  state.add_model_group(group)
1519  # todo: make # of models to deposit configurable somewhere
1520  e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
1521  self.system.ensembles.append(e)
1522  # Add localization density info if available
1523  for fname, stuple in sorted(density_custom_ranges.items()):
1524  e.load_localization_density(state, fname, stuple,
1525  self.asym_units)
1526  for stats in e.load_all_models(self, state):
1527  m = self.add_model(group)
1528  # Since we currently only deposit 1 model, it is the
1529  # best scoring one
1530  m.name = 'Best scoring model'
1531  m.stats = stats
1532  # Add RMSF info if available
1533  for c in state.all_modeled_components:
1534  e.load_rmsf(m, c)
1535 
1536  def _get_subassembly(self, comps, name, description):
1537  """Get an Assembly consisting of the given components.
1538  `compdict` is a dictionary of the components to add, where keys
1539  are the component names and values are the sequence ranges (or
1540  None to use all residues in the component)."""
1541  asyms = []
1542  for comp, seqrng in comps.items():
1543  a = self.asym_units[comp]
1544  asyms.append(a if seqrng is None else a(*seqrng))
1545 
1546  a = ihm.Assembly(asyms, name=name, description=description)
1547  return a
1548 
1549  def _add_foxs_restraint(self, model, comp, seqrange, dataset, rg, chi,
1550  details):
1551  """Add a basic FoXS fit. This is largely intended for use from the
1552  NPC application."""
1553  assembly = self._get_subassembly(
1554  {comp: seqrange},
1555  name="SAXS subassembly",
1556  description="All components that fit SAXS data")
1557  r = ihm.restraint.SASRestraint(
1558  dataset, assembly, segment=False,
1559  fitting_method='FoXS', fitting_atom_type='Heavy atoms',
1560  multi_state=False, radius_of_gyration=rg, details=details)
1561  r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi)
1562  self.system.restraints.append(r)
1563  self._add_restraint_dataset(r) # so that all-dataset group works
1564 
1565  def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
1566  image_resolution, projection_number,
1567  micrographs_number):
1568  r = _EM2DRestraint(state, r, i, resolution, pixel_size,
1569  image_resolution, projection_number,
1570  micrographs_number)
1571  self.system.restraints.append(r)
1572  self._add_restraint_dataset(r) # so that all-dataset group works
1573 
1574  def add_em3d_restraint(self, state, target_ps, densities, pmi_restraint):
1575  # todo: need to set allow_duplicates on this dataset?
1576  r = _EM3DRestraint(self, state, pmi_restraint, target_ps, densities)
1577  self.system.restraints.append(r)
1578  self._add_restraint_dataset(r) # so that all-dataset group works
1579 
1580  def add_zaxial_restraint(self, state, ps, lower_bound, upper_bound,
1581  sigma, pmi_restraint):
1582  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1583  sigma, pmi_restraint, self._xy_plane)
1584 
1585  def add_yaxial_restraint(self, state, ps, lower_bound, upper_bound,
1586  sigma, pmi_restraint):
1587  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1588  sigma, pmi_restraint, self._xz_plane)
1589 
1590  def add_xyradial_restraint(self, state, ps, lower_bound, upper_bound,
1591  sigma, pmi_restraint):
1592  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1593  sigma, pmi_restraint, self._z_axis)
1594 
1595  def _add_geometric_restraint(self, state, ps, lower_bound, upper_bound,
1596  sigma, pmi_restraint, geom):
1597  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1598  r = _GeometricRestraint(
1599  self, state, pmi_restraint, geom, asym.get_feature(ps),
1600  ihm.restraint.LowerUpperBoundDistanceRestraint(lower_bound,
1601  upper_bound),
1602  sigma)
1603  self.system.restraints.append(r)
1604  self._add_restraint_dataset(r) # so that all-dataset group works
1605 
1606  def _get_membrane(self, tor_R, tor_r, tor_th):
1607  """Get an object representing a half-torus membrane"""
1608  if not hasattr(self, '_seen_membranes'):
1609  self._seen_membranes = {}
1610  # If a membrane already exists with all dimensions within 0.01 of
1611  # this one, reuse it
1612  membrane_id = tuple(int(x * 100.) for x in (tor_R, tor_r, tor_th))
1613  if membrane_id not in self._seen_membranes:
1614  m = ihm.geometry.HalfTorus(
1615  center=self._center_origin,
1616  transformation=self._identity_transform,
1617  major_radius=tor_R, minor_radius=tor_r, thickness=tor_th,
1618  inner=True, name='Membrane')
1619  self._seen_membranes[membrane_id] = m
1620  return self._seen_membranes[membrane_id]
1621 
1622  def add_membrane_surface_location_restraint(
1623  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1624  self._add_membrane_restraint(
1625  state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint,
1626  ihm.restraint.UpperBoundDistanceRestraint(0.))
1627 
1628  def add_membrane_exclusion_restraint(
1629  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1630  self._add_membrane_restraint(
1631  state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint,
1632  ihm.restraint.LowerBoundDistanceRestraint(0.))
1633 
1634  def _add_membrane_restraint(self, state, ps, tor_R, tor_r, tor_th,
1635  sigma, pmi_restraint, rsr):
1636  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1637  r = _GeometricRestraint(
1638  self, state, pmi_restraint,
1639  self._get_membrane(tor_R, tor_r, tor_th), asym.get_feature(ps),
1640  rsr, sigma)
1641  self.system.restraints.append(r)
1642  self._add_restraint_dataset(r) # so that all-dataset group works
1643 
1644  def add_model(self, group, assembly=None, representation=None):
1645  state = self._last_state
1646  if representation is None:
1647  representation = self.default_representation
1648  protocol = self.all_protocols.get_last_protocol(state)
1649  m = _Model(state.prot, self, protocol,
1650  assembly if assembly else state.modeled_assembly,
1651  representation)
1652  group.append(m)
1653  return m
1654 
1655 
1657  """Get custom python-ihm dumpers for writing PMI to from mmCIF.
1658  This returns a list of custom dumpers that can be passed as all or
1659  part of the `dumpers` argument to ihm.dumper.write(). They add
1660  PMI-specific information to mmCIF or BinaryCIF files written out
1661  by python-ihm."""
1662  return [_ReplicaExchangeProtocolDumper]
1663 
1664 
1666  """Get custom python-ihm handlers for reading PMI data from mmCIF.
1667  This returns a list of custom handlers that can be passed as all or
1668  part of the `handlers` argument to ihm.reader.read(). They read
1669  PMI-specific information from mmCIF or BinaryCIF files read in
1670  by python-ihm."""
1671  return [_ReplicaExchangeProtocolHandler]
1672 
1673 
1674 class GMMParser(ihm.metadata.Parser):
1675  """Extract metadata from an EM density GMM file."""
1676 
1677  def parse_file(self, filename):
1678  """Extract metadata from `filename`.
1679  @return a dict with key `dataset` pointing to the GMM file and
1680  `number_of_gaussians` to the number of GMMs (or None)"""
1681  loc = ihm.location.InputFileLocation(
1682  filename,
1683  details="Electron microscopy density map, "
1684  "represented as a Gaussian Mixture Model (GMM)")
1685  # A 3DEM restraint's dataset ID uniquely defines the mmCIF
1686  # restraint, so we need to allow duplicates
1687  loc._allow_duplicates = True
1688  d = ihm.dataset.EMDensityDataset(loc)
1689  ret = {'dataset': d, 'number_of_gaussians': None}
1690 
1691  with open(filename) as fh:
1692  for line in fh:
1693  if line.startswith('# data_fn: '):
1694  p = ihm.metadata.MRCParser()
1695  fn = line[11:].rstrip('\r\n')
1696  dataset = p.parse_file(os.path.join(
1697  os.path.dirname(filename), fn))['dataset']
1698  ret['dataset'].parents.append(dataset)
1699  elif line.startswith('# ncenters: '):
1700  ret['number_of_gaussians'] = int(line[12:])
1701  return ret
Select non water and non hydrogen atoms.
Definition: pdb.h:245
def get_handlers
Get custom python-ihm handlers for reading PMI data from mmCIF.
Definition: mmcif.py:1665
def create_representation
Create a new Representation and return it.
Definition: mmcif.py:1252
def create_transformed_component
Make a new component that's a transformed copy of another.
Definition: mmcif.py:1303
Simple 3D transformation class.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:156
def exclude_coordinates
Don't record coordinates for the given domain.
Definition: mmcif.py:1262
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Class to encode a modeling protocol as mmCIF.
Definition: mmcif.py:1203
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
def parse_file
Extract metadata from filename.
Definition: mmcif.py:1678
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: mmcif.py:1162
Miscellaneous utilities.
Definition: tools.py:1
Extract metadata from an EM density GMM file.
Definition: mmcif.py:1674
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:731
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: mmcif.py:1180
void read_pdb(TextInput input, int model, Hierarchy h)
A single asymmetric unit in the system.
Definition: mmcif.py:1172
A single entity in the system.
Definition: mmcif.py:1145
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
Definition: mmcif.py:1495
Classes to represent data structures used in mmCIF.
Definition: data.py:1
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: mmcif.py:1184
void add_restraint(RMF::FileHandle fh, Restraint *hs)
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def finalize
Do any final processing on the class hierarchy.
Definition: mmcif.py:1366
Base class for capturing a modeling protocol.
Definition: output.py:45
Basic utilities for handling cryo-electron microscopy 3D density maps.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
def get_dumpers
Get custom python-ihm dumpers for writing PMI to from mmCIF.
Definition: mmcif.py:1656
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:200
3D rotation class.
Definition: Rotation3D.h:51
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
A decorator for a residue.
Definition: Residue.h:135
Basic functionality that is expected to be used by a wide variety of IMP users.
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: mmcif.py:1158
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
VectorD< 3 > Vector3D
Definition: VectorD.h:421
Mapping between FASTA one-letter codes and residue types.
Definition: alphabets.py:1
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:189
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...