IMP logo
IMP Reference Guide  2.12.0
The Integrative Modeling Platform
mmcif.py
1 """@namespace IMP.pmi.mmcif
2  @brief Support for the mmCIF file format.
3 
4  IMP has basic support for writing out files in mmCIF format, for
5  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
6  mmCIF files are currently generated by creating an
7  IMP.pmi.mmcif.ProtocolOutput class, and attaching it to an
8  IMP.pmi.representation.Representation object, after which any
9  generated models and metadata are collected and output as mmCIF.
10 """
11 
12 from __future__ import print_function
13 import copy
14 import RMF
15 import IMP.core
16 import IMP.algebra
17 import IMP.atom
18 import IMP.em
19 import IMP.isd
20 import IMP.pmi.tools
21 from IMP.pmi.tools import OrderedDict
22 import IMP.pmi.output
23 import IMP.mmcif.data
24 import re
25 import ast
26 import sys
27 import os
28 import textwrap
29 import weakref
30 import operator
31 import itertools
32 import ihm.location
33 import ihm.dataset
34 import ihm.dumper
35 import ihm.reader
36 import ihm.metadata
37 import ihm.startmodel
38 import ihm.model
39 import ihm.protocol
40 import ihm.analysis
41 import ihm.representation
42 import ihm.geometry
43 import ihm.cross_linkers
44 
45 def _assign_id(obj, seen_objs, obj_by_id):
46  """Assign a unique ID to obj, and track all ids in obj_by_id."""
47  if obj not in seen_objs:
48  if not hasattr(obj, 'id'):
49  obj_by_id.append(obj)
50  obj.id = len(obj_by_id)
51  seen_objs[obj] = obj.id
52  else:
53  obj.id = seen_objs[obj]
54 
55 
56 def _get_by_residue(p):
57  """Determine whether the given particle represents a specific residue
58  or a more coarse-grained object."""
60 
61 
62 class _ComponentMapper(object):
63  """Map a Particle to a component name"""
64  def __init__(self, prot):
65  self.o = IMP.pmi.output.Output()
66  self.prot = prot
67  self.name = 'cif-output'
68  self.o.dictionary_pdbs[self.name] = self.prot
69  self.o._init_dictchain(self.name, self.prot, multichar_chain=True)
70 
71  def __getitem__(self, p):
72  protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
73  return protname
74 
75 
76 class _AsymMapper(object):
77  """Map a Particle to an asym_unit"""
78  def __init__(self, simo, prot):
79  self.simo = simo
80  self._cm = _ComponentMapper(prot)
81  self._seen_ranges = {}
82 
83  def __getitem__(self, p):
84  protname = self._cm[p]
85  return self.simo.asym_units[protname]
86 
87  def get_feature(self, ps):
88  """Get an ihm.restraint.Feature that covers the given particles"""
89  # todo: handle things other than residues
90  rngs = []
91  for p in ps:
92  asym = self[p]
93  # todo: handle overlapping ranges
95  rind = IMP.atom.Residue(p).get_index()
96  rng = asym(rind, rind)
98  # PMI Fragments always contain contiguous residues
99  rinds = IMP.atom.Fragment(p).get_residue_indexes()
100  rng = asym(rinds[0], rinds[-1])
101  else:
102  raise ValueError("Unsupported particle type %s" % str(p))
103  # Join contiguous ranges
104  if len(rngs) > 0 and rngs[-1].asym == asym \
105  and rngs[-1].seq_id_range[1] == rng.seq_id_range[0] - 1:
106  rngs[-1].seq_id_range = (rngs[-1].seq_id_range[0],
107  rng.seq_id_range[1])
108  else:
109  rngs.append(rng)
110  # If an identical feature already exists, return that
111  # todo: python-ihm should handle this automatically for us
112  hrngs = tuple(rngs)
113  if hrngs in self._seen_ranges:
114  return self._seen_ranges[hrngs]
115  else:
116  feat = ihm.restraint.ResidueFeature(rngs)
117  self._seen_ranges[hrngs] = feat
118  return feat
119 
120 
121 class _AllSoftware(object):
122  def __init__(self, system):
123  self.system = system
124  self.modeller_used = self.phyre2_used = False
125  self.pmi = ihm.Software(
126  name="IMP PMI module",
127  version=IMP.pmi.__version__,
128  classification="integrative model building",
129  description="integrative model building",
130  location='https://integrativemodeling.org')
131  self.imp = ihm.Software(
132  name="Integrative Modeling Platform (IMP)",
133  version=IMP.__version__,
134  classification="integrative model building",
135  description="integrative model building",
136  location='https://integrativemodeling.org')
137  self.system.software.extend([self.pmi, self.imp])
138 
139  def set_modeller_used(self, version, date):
140  if self.modeller_used:
141  return
142  self.modeller_used = True
143  self.system.software.append(ihm.Software(
144  name='MODELLER', classification='comparative modeling',
145  description='Comparative modeling by satisfaction '
146  'of spatial restraints, build ' + date,
147  location='https://salilab.org/modeller/',
148  version=version))
149 
150  def set_phyre2_used(self):
151  if self.phyre2_used:
152  return
153  self.phyre2_used = True
154  self.system.software.append(ihm.Software(
155  name='Phyre2', classification='protein homology modeling',
156  description='Protein Homology/analogY Recognition '
157  'Engine V 2.0',
158  version='2.0',
159  location='http://www.sbg.bio.ic.ac.uk/~phyre2/'))
160 
161 def _get_fragment_is_rigid(fragment):
162  """Determine whether a fragment is modeled rigidly"""
163  leaves = IMP.atom.get_leaves(fragment.hier)
164  # Assume all leaves are set up as rigid/flexible in the same way
165  return IMP.core.RigidMember.get_is_setup(leaves[0])
166 
167 class _PDBFragment(ihm.representation.ResidueSegment):
168  """Record details about part of a PDB file used as input
169  for a component."""
170  def __init__(self, state, component, start, end, pdb_offset,
171  pdbname, chain, hier, asym_unit):
172  # start, end are PMI residue indexes (not IHM)
173  super(_PDBFragment, self).__init__(
174  asym_unit=asym_unit.pmi_range(start, end),
175  rigid=None, primitive='sphere')
176  self.component, self.start, self.end, self.offset, self.pdbname \
177  = component, start, end, pdb_offset, pdbname
178  self.state, self.chain, self.hier = state, chain, hier
181  self.starting_hier = IMP.atom.read_pdb(pdbname, state.model, sel)
182 
183  rigid = property(lambda self: _get_fragment_is_rigid(self),
184  lambda self, val: None)
185 
186  def combine(self, other):
187  pass
188 
189 
190 class _BeadsFragment(ihm.representation.FeatureSegment):
191  """Record details about beads used to represent part of a component."""
192  chain = None
193  def __init__(self, state, component, start, end, count, hier, asym_unit):
194  super(_BeadsFragment, self).__init__(asym_unit=asym_unit(start, end),
195  rigid=None, primitive='sphere', count=count)
196  self.state, self.component, self.hier \
197  = state, component, hier
198 
199  rigid = property(lambda self: _get_fragment_is_rigid(self),
200  lambda self, val: None)
201 
202  def combine(self, other):
203  # todo: don't combine if one fragment is rigid and the other flexible
204  if type(other) == type(self) and \
205  other.asym_unit.seq_id_range[0] == self.asym_unit.seq_id_range[1] + 1:
206  self.asym_unit.seq_id_range = (self.asym_unit.seq_id_range[0],
207  other.asym_unit.seq_id_range[1])
208  self.count += other.count
209  return True
210 
211 
212 class _AllModelRepresentations(object):
213  def __init__(self, simo):
214  self.simo = simo
215  # dict of fragments, ordered by representation, then component name,
216  # then state
217  self.fragments = OrderedDict()
218  self._all_representations = {}
219 
220  def copy_component(self, state, name, original, asym_unit):
221  """Copy all representation for `original` in `state` to `name`"""
222  def copy_frag(f):
223  newf = copy.copy(f)
224  newf.asym_unit = asym_unit(*f.asym_unit.seq_id_range)
225  return newf
226  for rep in self.fragments:
227  if original in self.fragments[rep]:
228  if name not in self.fragments[rep]:
229  self.fragments[rep][name] = OrderedDict()
230  self.fragments[rep][name][state] = [copy_frag(f)
231  for f in self.fragments[rep][original][state]]
232  # Assume representation for a component is the same in all
233  # states, so only write out the first one
234  first_state = list(self.fragments[rep][name].keys())[0]
235  if state is first_state:
236  representation = self._all_representations[rep]
237  representation.extend(self.fragments[rep][name][state])
238 
239  def add_fragment(self, state, representation, fragment):
240  """Add a model fragment."""
241  comp = fragment.component
242  id_rep = id(representation)
243  self._all_representations[id_rep] = representation
244  if id_rep not in self.fragments:
245  self.fragments[id_rep] = OrderedDict()
246  if comp not in self.fragments[id_rep]:
247  self.fragments[id_rep][comp] = OrderedDict()
248  if state not in self.fragments[id_rep][comp]:
249  self.fragments[id_rep][comp][state] = []
250  fragments = self.fragments[id_rep][comp][state]
251  if len(fragments) == 0 or not fragments[-1].combine(fragment):
252  fragments.append(fragment)
253  # Assume representation for a component is the same in all states,
254  # so only write out the first one
255  first_state = list(self.fragments[id_rep][comp].keys())[0]
256  if state is first_state:
257  representation.append(fragment)
258 
259 
260 class _AllDatasets(object):
261  """Track all datasets generated by PMI and add them to the ihm.System"""
262  def __init__(self, system):
263  self.system = system
264  self._datasets = []
265  self._datasets_by_state = {}
266  self._restraints_by_state = {}
267 
268  def get_all_group(self, state):
269  """Get a DatasetGroup encompassing all datasets so far in this state"""
270  # Note that if a restraint dataset is replaced after this is called,
271  # the group will still contain the old dataset - mark dataset as read
272  # only?
273  g = ihm.dataset.DatasetGroup(self._datasets_by_state.get(state, [])
274  + [r.dataset for r in self._restraints_by_state.get(state, [])
275  if r.dataset])
276  return g
277 
278  def add(self, state, dataset):
279  """Add a new dataset."""
280  self._datasets.append(dataset)
281  if state not in self._datasets_by_state:
282  self._datasets_by_state[state] = []
283  self._datasets_by_state[state].append(dataset)
284  # Make sure that the dataset ends up in the mmCIF file even if nothing
285  # refers to it
286  self.system.orphan_datasets.append(dataset)
287  return dataset
288 
289  def add_restraint(self, state, restraint):
290  """Add the dataset for a restraint"""
291  if state not in self._restraints_by_state:
292  self._restraints_by_state[state] = []
293  self._restraints_by_state[state].append(restraint)
294 
295 
296 class _CrossLinkRestraint(ihm.restraint.CrossLinkRestraint):
297  """Restrain to a set of cross links"""
298 
299  assembly = None
300  _label_map = {'wtDSS':'DSS', 'scDSS':'DSS', 'scEDC':'EDC'}
301  _descriptor_map = {'DSS': ihm.cross_linkers.dss,
302  'EDC': ihm.cross_linkers.edc}
303 
304  def __init__(self, pmi_restraint):
305  self.pmi_restraint = pmi_restraint
306  label = self.pmi_restraint.label
307  # Map commonly-used subtypes to more standard labels
308  label = self._label_map.get(label, label)
309  self.label = label
310  super(_CrossLinkRestraint, self).__init__(
311  dataset=self.pmi_restraint.dataset,
312  linker=self._get_chem_descriptor(label))
313 
314  @classmethod
315  def _get_chem_descriptor(cls, label):
316  if label not in cls._descriptor_map:
317  # If label is not a standard linker type, make a new chemical
318  # descriptor containing just the name. We don't know the chemistry
319  # so cannot specify a SMILES or INCHI string for it at this point
320  d = ihm.ChemDescriptor(label)
321  cls._descriptor_map[label] = d
322  return cls._descriptor_map[label]
323 
324  def _set_psi_sigma(self, model):
325  old_values = []
326  # Do nothing if given a different state than the restraint applies to
327  if model.m != self.pmi_restraint.model:
328  return
329  for resolution in self.pmi_restraint.sigma_dictionary:
330  statname = 'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, self.label)
331  if model.stats and statname in model.stats:
332  sigma = float(model.stats[statname])
333  p = self.pmi_restraint.sigma_dictionary[resolution][0]
334  old_values.append((p, p.get_scale()))
335  p.set_scale(sigma)
336  for psiindex in self.pmi_restraint.psi_dictionary:
337  statname = 'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, self.label)
338  if model.stats and statname in model.stats:
339  psi = float(model.stats[statname])
340  p = self.pmi_restraint.psi_dictionary[psiindex][0]
341  old_values.append((p, p.get_scale()))
342  p.set_scale(psi)
343  # If the same particle is set multiple times, make sure we get the
344  # original value last
345  return list(reversed(old_values))
346 
347  def add_fits_from_model_statfile(self, model):
348  # Set psi/sigma for all particles
349  old_values = self._set_psi_sigma(model)
350  # If no stats were collected, we can't show the fit
351  if not old_values:
352  return
353  for xl in self.cross_links:
354  # Get current psi/sigma particle value for each XL
355  xl.fits[model] = ihm.restraint.CrossLinkFit(
356  psi=xl.psi, sigma1=xl.sigma1, sigma2=xl.sigma2)
357  # Restore original psi/sigma
358  for p, scale in old_values:
359  p.set_scale(scale)
360 
361  # Have our dataset point to that in the original PMI restraint
362  def __set_dataset(self, val):
363  self.pmi_restraint.dataset = val
364  dataset = property(lambda self: self.pmi_restraint.dataset,
365  __set_dataset)
366 
367 
368 def get_asym_mapper_for_state(simo, state, asym_map):
369  asym = asym_map.get(state, None)
370  if asym is None:
371  asym = _AsymMapper(simo, state.prot)
372  asym_map[state] = asym
373  return asym
374 
375 class _PMICrossLink(object):
376  # Query PMI particles to get psi and sigma values at time of use, not
377  # time of construction (since they may be set after creating the restraint)
378  psi = property(lambda self: self.psi_p.get_scale(),
379  lambda self, val: None)
380  sigma1 = property(lambda self: self.sigma1_p.get_scale(),
381  lambda self, val: None)
382  sigma2 = property(lambda self: self.sigma2_p.get_scale(),
383  lambda self, val: None)
384 
385 
386 class _ResidueCrossLink(ihm.restraint.ResidueCrossLink, _PMICrossLink):
387  pass
388 
389 
390 class _FeatureCrossLink(ihm.restraint.FeatureCrossLink, _PMICrossLink):
391  pass
392 
393 
394 class _EM2DRestraint(ihm.restraint.EM2DRestraint):
395  def __init__(self, state, pmi_restraint, image_number, resolution,
396  pixel_size, image_resolution, projection_number,
397  micrographs_number):
398  self.pmi_restraint, self.image_number = pmi_restraint, image_number
399  super(_EM2DRestraint, self).__init__(
400  dataset=pmi_restraint.datasets[image_number],
401  assembly=state.modeled_assembly,
402  segment=False, number_raw_micrographs=micrographs_number,
403  pixel_size_width=pixel_size, pixel_size_height=pixel_size,
404  image_resolution=image_resolution,
405  number_of_projections=projection_number)
406 
407  # Have our dataset point to that in the original PMI restraint
408  def __get_dataset(self):
409  return self.pmi_restraint.datasets[self.image_number]
410  def __set_dataset(self, val):
411  self.pmi_restraint.datasets[self.image_number] = val
412  dataset = property(__get_dataset, __set_dataset)
413 
414  def add_fits_from_model_statfile(self, model):
415  ccc = self._get_cross_correlation(model)
416  transform = self._get_transformation(model)
417  rot = transform.get_rotation()
418  rm = [[e for e in rot.get_rotation_matrix_row(i)] for i in range(3)]
419  self.fits[model] = ihm.restraint.EM2DRestraintFit(
420  cross_correlation_coefficient=ccc,
421  rot_matrix=rm,
422  tr_vector=transform.get_translation())
423 
424  def _get_transformation(self, model):
425  """Get the transformation that places the model on the image"""
426  stats = model.em2d_stats or model.stats
427  prefix = 'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
428  self.image_number + 1)
429  r = [float(stats[prefix + '_Rotation%d' % i]) for i in range(4)]
430  t = [float(stats[prefix + '_Translation%d' % i])
431  for i in range(3)]
432  # If the model coordinates are transformed, need to back transform
433  # them first
434  inv = model.transform.get_inverse()
436  IMP.algebra.Vector3D(*t)) * inv
437 
438  def _get_cross_correlation(self, model):
439  """Get the cross correlation coefficient between the model projection
440  and the image"""
441  stats = model.em2d_stats or model.stats
442  return float(stats['ElectronMicroscopy2D_%s_Image%d_CCC'
443  % (self.pmi_restraint.label,
444  self.image_number + 1)])
445 
446 
447 class _EM3DRestraint(ihm.restraint.EM3DRestraint):
448 
449  def __init__(self, simo, state, pmi_restraint, target_ps, densities):
450  self.pmi_restraint = pmi_restraint
451  super(_EM3DRestraint, self).__init__(
452  dataset=pmi_restraint.dataset,
453  assembly=self._get_assembly(densities, simo, state),
454  fitting_method='Gaussian mixture models',
455  number_of_gaussians=len(target_ps))
456 
457  # Have our dataset point to that in the original PMI restraint
458  def __set_dataset(self, val):
459  self.pmi_restraint.dataset = val
460  dataset = property(lambda self: self.pmi_restraint.dataset,
461  __set_dataset)
462 
463  def _get_assembly(self, densities, simo, state):
464  """Get the Assembly that this restraint acts on"""
465  cm = _ComponentMapper(state.prot)
466  components = {}
467  for d in densities:
468  components[cm[d]] = None # None == all residues in this component
469  a = simo._get_subassembly(components,
470  name="EM subassembly",
471  description="All components that fit the EM map")
472  return a
473 
474  def add_fits_from_model_statfile(self, model):
475  ccc = self._get_cross_correlation(model)
476  self.fits[model] = ihm.restraint.EM3DRestraintFit(
477  cross_correlation_coefficient=ccc)
478 
479  def _get_cross_correlation(self, model):
480  """Get the cross correlation coefficient between the model
481  and the map"""
482  if model.stats is not None:
483  return float(model.stats['GaussianEMRestraint_%s_CCC'
484  % self.pmi_restraint.label])
485 
486 
487 class _GeometricRestraint(ihm.restraint.GeometricRestraint):
488 
489  def __init__(self, simo, state, pmi_restraint, geometric_object,
490  feature, distance, sigma):
491  self.pmi_restraint = pmi_restraint
492  super(_GeometricRestraint, self).__init__(
493  dataset=pmi_restraint.dataset,
494  geometric_object=geometric_object, feature=feature,
495  distance=distance, harmonic_force_constant = 1. / sigma,
496  restrain_all=True)
497 
498  # Have our dataset point to that in the original PMI restraint
499  def __set_dataset(self, val):
500  self.pmi_restraint.dataset = val
501  dataset = property(lambda self: self.pmi_restraint.dataset,
502  __set_dataset)
503 
504 
505 class _ReplicaExchangeProtocolStep(ihm.protocol.Step):
506  def __init__(self, state, rex):
507  if rex.monte_carlo_sample_objects is not None:
508  method = 'Replica exchange monte carlo'
509  else:
510  method = 'Replica exchange molecular dynamics'
511  self.monte_carlo_temperature = rex.vars['monte_carlo_temperature']
512  self.replica_exchange_minimum_temperature = \
513  rex.vars['replica_exchange_minimum_temperature']
514  self.replica_exchange_maximum_temperature = \
515  rex.vars['replica_exchange_maximum_temperature']
516  super(_ReplicaExchangeProtocolStep, self).__init__(
517  assembly=state.modeled_assembly,
518  dataset_group=None, # filled in by add_step()
519  method=method, name='Sampling',
520  num_models_begin=None, # filled in by add_step()
521  num_models_end=rex.vars["number_of_frames"],
522  multi_scale=True, multi_state=False, ordered=False)
523 
524 
525 class _ReplicaExchangeProtocolDumper(ihm.dumper.Dumper):
526  """Write IMP-specific information about replica exchange to mmCIF.
527  Note that IDs will have already been assigned by python-ihm's
528  standard modeling protocol dumper."""
529  def dump(self, system, writer):
530  with writer.loop("_imp_replica_exchange_protocol",
531  ["protocol_id", "step_id", "monte_carlo_temperature",
532  "replica_exchange_minimum_temperature",
533  "replica_exchange_maximum_temperature"]) as l:
534  for p in system._all_protocols():
535  for s in p.steps:
536  if isinstance(s, _ReplicaExchangeProtocolStep):
537  self._dump_step(p, s, l)
538 
539  def _dump_step(self, p, s, l):
540  l.write(protocol_id=p._id, step_id=s._id,
541  monte_carlo_temperature=s.monte_carlo_temperature,
542  replica_exchange_minimum_temperature= \
543  s.replica_exchange_minimum_temperature,
544  replica_exchange_maximum_temperature= \
545  s.replica_exchange_maximum_temperature)
546 
547 
548 class _ReplicaExchangeProtocolHandler(ihm.reader.Handler):
549  category = '_imp_replica_exchange_protocol'
550 
551  """Read IMP-specific information about replica exchange from mmCIF."""
552  def __call__(self, protocol_id, step_id, monte_carlo_temperature,
553  replica_exchange_minimum_temperature,
554  replica_exchange_maximum_temperature):
555  p = self.sysr.protocols.get_by_id(protocol_id)
556  # todo: match step_id properly (don't assume contiguous)
557  s = p.steps[int(step_id)-1]
558  # Upgrade from plain ihm Step to IMP subclass
559  s.__class__ = _ReplicaExchangeProtocolStep
560  s.monte_carlo_temperature = \
561  self.get_float(monte_carlo_temperature)
562  s.replica_exchange_minimum_temperature = \
563  self.get_float(replica_exchange_minimum_temperature)
564  s.replica_exchange_maximum_temperature = \
565  self.get_float(replica_exchange_maximum_temperature)
566 
567 
568 class _SimpleProtocolStep(ihm.protocol.Step):
569  def __init__(self, state, num_models_end, method):
570  super(_SimpleProtocolStep, self).__init__(
571  assembly=state.modeled_assembly,
572  dataset_group=None, # filled in by add_step()
573  method=method, name='Sampling',
574  num_models_begin=None, # filled in by add_step()
575  num_models_end=num_models_end,
576  multi_scale=True, multi_state=False, ordered=False)
577 
578 
579 class _Chain(object):
580  """Represent a single chain in a Model"""
581  def __init__(self, pmi_chain_id, asym_unit):
582  self.pmi_chain_id, self.asym_unit = pmi_chain_id, asym_unit
583  self.spheres = []
584  self.atoms = []
585 
586  def add(self, xyz, atom_type, residue_type, residue_index,
587  all_indexes, radius):
588  if atom_type is None:
589  self.spheres.append((xyz, residue_type, residue_index,
590  all_indexes, radius))
591  else:
592  self.atoms.append((xyz, atom_type, residue_type, residue_index,
593  all_indexes, radius))
594  orig_comp = property(lambda self: self.comp)
595 
596 class _TransformedChain(object):
597  """Represent a chain that is a transformed version of another"""
598  def __init__(self, orig_chain, asym_unit, transform):
599  self.orig_chain, self.asym_unit = orig_chain, asym_unit
600  self.transform = transform
601 
602  def __get_spheres(self):
603  for (xyz, residue_type, residue_index, all_indexes,
604  radius) in self.orig_chain.spheres:
605  yield (self.transform * xyz, residue_type, residue_index,
606  all_indexes, radius)
607  spheres = property(__get_spheres)
608 
609  def __get_atoms(self):
610  for (xyz, atom_type, residue_type, residue_index, all_indexes,
611  radius) in self.orig_chain.atoms:
612  yield (self.transform * xyz, atom_type, residue_type, residue_index,
613  all_indexes, radius)
614  atoms = property(__get_atoms)
615 
616  entity = property(lambda self: self.orig_chain.entity)
617  orig_comp = property(lambda self: self.orig_chain.comp)
618 
619 class _Excluder(object):
620  def __init__(self, component, simo):
621  self._seqranges = simo._exclude_coords.get(component, [])
622 
623  def is_excluded(self, indexes):
624  """Return True iff the given sequence range is excluded."""
625  for seqrange in self._seqranges:
626  if indexes[0] >= seqrange[0] and indexes[-1] <= seqrange[1]:
627  return True
628 
629 
630 class _Model(ihm.model.Model):
631  def __init__(self, prot, simo, protocol, assembly, representation):
632  super(_Model, self).__init__(assembly=assembly, protocol=protocol,
633  representation=representation)
634  self.simo = weakref.proxy(simo)
635  # Transformation from IMP coordinates into mmCIF coordinate system.
636  # Normally we pass through coordinates unchanged, but in some cases
637  # we may want to translate them (e.g. Nup84, where the deposited PDB
638  # files are all centered; we want the mmCIF files to match)
640  self.em2d_stats = None
641  self.stats = None
642  # True iff restraints act on this model
643  self._is_restrained = True
644  o = self.output = IMP.pmi.output.Output(atomistic=True)
645  name = 'cif-output'
646  self.m = prot.get_model()
647  o.dictionary_pdbs[name] = prot
648  o._init_dictchain(name, prot, multichar_chain=True)
649  (particle_infos_for_pdb,
650  self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
651  self.geometric_center = IMP.algebra.Vector3D(*self.geometric_center)
652  self._make_spheres_atoms(particle_infos_for_pdb, o, name, simo)
653  self.rmsf = {}
654 
655  def all_chains(self, simo):
656  """Yield all chains, including transformed ones"""
657  chain_for_comp = {}
658  for c in self.chains:
659  yield c
660  chain_for_comp[c.comp] = c
661  for tc in simo._transformed_components:
662  orig_chain = chain_for_comp.get(tc.original, None)
663  if orig_chain:
664  asym = simo.asym_units[tc.name]
665  c = _TransformedChain(orig_chain, asym, tc.transform)
666  c.comp = tc.name
667  yield c
668 
669  def _make_spheres_atoms(self, particle_infos_for_pdb, o, name, simo):
670  entity_for_chain = {}
671  comp_for_chain = {}
672  correct_asym = {}
673  for protname, chain_id in o.dictchain[name].items():
674  if protname in simo.entities:
675  entity_for_chain[chain_id] = simo.entities[protname]
676  else:
677  # Remove copy number
678  pn = protname.split('.')[0]
679  entity_for_chain[chain_id] = simo.entities[pn]
680  comp_for_chain[chain_id] = protname
681  # When doing multi-state modeling, the chain ID returned here
682  # (assigned sequentially) might not be correct (states may have
683  # gaps in the chain IDs). Map it to the correct asym unit.
684  correct_asym[chain_id] = simo.asym_units[protname]
685 
686  # Gather by chain ID (should be sorted by chain ID already)
687  self.chains = []
688  chain = None
689  excluder = None
690 
691  for (xyz, atom_type, residue_type, chain_id, residue_index,
692  all_indexes, radius) in particle_infos_for_pdb:
693  if chain is None or chain.pmi_chain_id != chain_id:
694  chain = _Chain(chain_id, correct_asym[chain_id])
695  chain.entity = entity_for_chain[chain_id]
696  chain.comp = comp_for_chain[chain_id]
697  self.chains.append(chain)
698  excluder = _Excluder(chain.comp, simo)
699  if not excluder.is_excluded(all_indexes if all_indexes
700  else [residue_index]):
701  chain.add(xyz, atom_type, residue_type, residue_index,
702  all_indexes, radius)
703 
704  def parse_rmsf_file(self, fname, component):
705  self.rmsf[component] = rmsf = {}
706  with open(fname) as fh:
707  for line in fh:
708  resnum, blocknum, val = line.split()
709  rmsf[int(resnum)] = (int(blocknum), float(val))
710 
711  def get_rmsf(self, component, indexes):
712  """Get the RMSF value for the given residue indexes."""
713  if not self.rmsf:
714  return None
715  rmsf = self.rmsf[component]
716  blocknums = dict.fromkeys(rmsf[ind][0] for ind in indexes)
717  if len(blocknums) != 1:
718  raise ValueError("Residue indexes %s aren't all in the same block"
719  % str(indexes))
720  return rmsf[indexes[0]][1]
721 
722  def get_atoms(self):
723  for chain in self.all_chains(self.simo):
724  pmi_offset = chain.asym_unit.entity.pmi_offset
725  for atom in chain.atoms:
726  (xyz, atom_type, residue_type, residue_index,
727  all_indexes, radius) = atom
728  pt = self.transform * xyz
729  yield ihm.model.Atom(asym_unit=chain.asym_unit,
730  seq_id=residue_index - pmi_offset,
731  atom_id=atom_type.get_string(),
732  type_symbol=None, # todo: get element
733  x=pt[0], y=pt[1], z=pt[2])
734 
735  def get_spheres(self):
736  for chain in self.all_chains(self.simo):
737  pmi_offset = chain.asym_unit.entity.pmi_offset
738  for sphere in chain.spheres:
739  (xyz, residue_type, residue_index,
740  all_indexes, radius) = sphere
741  if all_indexes is None:
742  all_indexes = (residue_index,)
743  pt = self.transform * xyz
744  yield ihm.model.Sphere(asym_unit=chain.asym_unit,
745  seq_id_range=(all_indexes[0] - pmi_offset,
746  all_indexes[-1] - pmi_offset),
747  x=pt[0], y=pt[1], z=pt[2], radius=radius,
748  rmsf=self.get_rmsf(chain.orig_comp, all_indexes))
749 
750 
751 class _AllProtocols(object):
752  def __init__(self, simo):
753  self.simo = weakref.proxy(simo)
754  # Lists of Protocols by state
755  self.protocols = OrderedDict()
756 
757  def add_protocol(self, state):
758  """Add a new Protocol"""
759  if state not in self.protocols:
760  self.protocols[state] = []
761  p = ihm.protocol.Protocol()
762  self.simo.system.orphan_protocols.append(p)
763  self.protocols[state].append(p)
764 
765  def add_step(self, step, state):
766  """Add a ProtocolStep to the last Protocol of the given State"""
767  if state not in self.protocols:
768  self.add_protocol(state)
769  protocol = self.get_last_protocol(state)
770  if len(protocol.steps) == 0:
771  step.num_models_begin = 0
772  else:
773  step.num_models_begin = protocol.steps[-1].num_models_end
774  protocol.steps.append(step)
775  step.id = len(protocol.steps)
776  # Assume that protocol uses all currently-defined datasets
777  step.dataset_group = self.simo.all_datasets.get_all_group(state)
778 
779  def add_postproc(self, step, state):
780  """Add a postprocessing step to the last protocol"""
781  protocol = self.get_last_protocol(state)
782  if len(protocol.analyses) == 0:
783  protocol.analyses.append(ihm.analysis.Analysis())
784  protocol.analyses[-1].steps.append(step)
785 
786  def get_last_protocol(self, state):
787  """Return the most recently-added _Protocol"""
788  return self.protocols[state][-1]
789 
790 
791 class _AllStartingModels(object):
792  def __init__(self, simo):
793  self.simo = simo
794  # dict of starting models (entire PDB files), collected from fragments,
795  # ordered by component name and state
796  self.models = OrderedDict()
797  self.output = IMP.pmi.output.Output()
798 
799  def add_pdb_fragment(self, fragment):
800  """Add a starting model PDB fragment."""
801  comp = fragment.component
802  state = fragment.state
803  if comp not in self.models:
804  self.models[comp] = OrderedDict()
805  if state not in self.models[comp]:
806  self.models[comp][state] = []
807  models = self.models[comp][state]
808  if len(models) == 0 \
809  or models[-1].fragments[0].pdbname != fragment.pdbname:
810  model = self._add_model(fragment)
811  models.append(model)
812  else:
813  # Break circular ref between fragment and model
814  models[-1].fragments.append(weakref.proxy(fragment))
815  # Update residue range to cover all fragments
816  pmi_offset = models[-1].asym_unit.entity.pmi_offset
817  sid_begin = min(fragment.start + fragment.offset - pmi_offset,
818  models[-1].asym_unit.seq_id_range[0])
819  sid_end = max(fragment.end + fragment.offset - pmi_offset,
820  models[-1].asym_unit.seq_id_range[1])
821  models[-1].asym_unit = fragment.asym_unit.asym(sid_begin, sid_end)
822  fragment.starting_model = models[-1]
823 
824  def _add_model(self, f):
825  parser = ihm.metadata.PDBParser()
826  r = parser.parse_file(f.pdbname)
827 
828  self.simo._add_dataset(r['dataset'])
829  # We only want the templates that model the starting model chain
830  templates = r['templates'].get(f.chain, [])
831  for t in templates:
832  if t.alignment_file:
833  self.simo.system.locations.append(t.alignment_file)
834  if t.dataset:
835  self.simo._add_dataset(t.dataset)
836  source = r['entity_source'].get(f.chain)
837  if source:
838  f.asym_unit.entity.source = source
839  pmi_offset = f.asym_unit.entity.pmi_offset
840  m = _StartingModel(
841  asym_unit=f.asym_unit.asym.pmi_range(f.start, f.end),
842  dataset=r['dataset'], asym_id=f.chain,
843  templates=templates, offset=f.offset + pmi_offset,
844  metadata=r['metadata'],
845  software=r['software'][0] if r['software'] else None,
846  script_file=r['script'])
847  m.fragments = [weakref.proxy(f)]
848  return m
849 
850 
851 class _StartingModel(ihm.startmodel.StartingModel):
852  def get_seq_dif(self):
853  return self._seq_dif
854 
855  def get_atoms(self):
856  pmi_offset = self.asym_unit.entity.pmi_offset
857  mh = IMP.mmcif.data._StartingModelAtomHandler(self.templates,
858  self.asym_unit)
859  for f in self.fragments:
860  sel = IMP.atom.Selection(f.starting_hier,
861  residue_indexes=list(range(f.start - f.offset,
862  f.end - f.offset + 1)))
863  for a in mh.get_ihm_atoms(sel.get_selected_particles(),
864  f.offset - pmi_offset):
865  yield a
866  self._seq_dif = mh._seq_dif
867 
868 
869 class _ReplicaExchangeAnalysisPostProcess(ihm.analysis.ClusterStep):
870  """Post processing using AnalysisReplicaExchange0 macro"""
871 
872  def __init__(self, rex, num_models_begin):
873  self.rex = rex
874  num_models_end = 0
875  for fname in self.get_all_stat_files():
876  with open(fname) as fh:
877  num_models_end += len(fh.readlines())
878  super(_ReplicaExchangeAnalysisPostProcess, self).__init__(
879  feature='RMSD', num_models_begin=num_models_begin,
880  num_models_end=num_models_end)
881 
882  def get_stat_file(self, cluster_num):
883  return os.path.join(self.rex._outputdir, "cluster.%d" % cluster_num,
884  'stat.out')
885 
886  def get_all_stat_files(self):
887  for i in range(self.rex._number_of_clusters):
888  yield self.get_stat_file(i)
889 
890 
891 class _ReplicaExchangeAnalysisEnsemble(ihm.model.Ensemble):
892  """Ensemble generated using AnalysisReplicaExchange0 macro"""
893 
894  num_models_deposited = None # Override base class property
895 
896  def __init__(self, pp, cluster_num, model_group, num_deposit):
897  with open(pp.get_stat_file(cluster_num)) as fh:
898  num_models = len(fh.readlines())
899  super(_ReplicaExchangeAnalysisEnsemble, self).__init__(
900  num_models=num_models,
901  model_group=model_group, post_process=pp,
902  clustering_feature=pp.feature,
903  name=model_group.name)
904  self.cluster_num = cluster_num
905  self.num_models_deposited = num_deposit
906 
907  def get_rmsf_file(self, component):
908  return os.path.join(self.post_process.rex._outputdir,
909  'cluster.%d' % self.cluster_num,
910  'rmsf.%s.dat' % component)
911 
912  def load_rmsf(self, model, component):
913  fname = self.get_rmsf_file(component)
914  if os.path.exists(fname):
915  model.parse_rmsf_file(fname, component)
916 
917  def get_localization_density_file(self, fname):
918  return os.path.join(self.post_process.rex._outputdir,
919  'cluster.%d' % self.cluster_num,
920  '%s.mrc' % fname)
921 
922  def load_localization_density(self, state, fname, select_tuple, asym_units):
923  fullpath = self.get_localization_density_file(fname)
924  if os.path.exists(fullpath):
925  details = "Localization density for %s %s" \
926  % (fname, self.model_group.name)
927  local_file = ihm.location.OutputFileLocation(fullpath,
928  details=details)
929  for s in select_tuple:
930  if isinstance(s, tuple) and len(s) == 3:
931  asym = asym_units[s[2]].pmi_range(s[0], s[1])
932  else:
933  asym = asym_units[s]
934  den = ihm.model.LocalizationDensity(file=local_file,
935  asym_unit=asym)
936  self.densities.append(den)
937 
938  def load_all_models(self, simo, state):
939  stat_fname = self.post_process.get_stat_file(self.cluster_num)
940  model_num = 0
941  with open(stat_fname) as fh:
942  stats = ast.literal_eval(fh.readline())
943  # Correct path
944  rmf_file = os.path.join(os.path.dirname(stat_fname),
945  "%d.rmf3" % model_num)
946  # todo: test with real PMI2 systems
947  if os.path.exists(rmf_file):
948  rh = RMF.open_rmf_file_read_only(rmf_file)
949  system = state._pmi_object.system
950  IMP.rmf.link_hierarchies(rh, [system.hier])
951  IMP.rmf.load_frame(fh, RMF.FrameID(0))
952  # todo: fill in other data from stat file, e.g. crosslink phi/psi
953  yield stats
954  model_num += 1
955  if model_num >= self.num_models_deposited:
956  return
957 
958  # todo: also support dRMS precision
959  def _get_precision(self):
960  precfile = os.path.join(self.post_process.rex._outputdir,
961  "precision.%d.%d.out" % (self.cluster_num,
962  self.cluster_num))
963  if not os.path.exists(precfile):
964  return ihm.unknown
965  # Fail if the precision.x.x.out file doesn't match the cluster
966  r = re.compile('All .*/cluster.%d/ average centroid distance ([\d\.]+)'
967  % self.cluster_num)
968  with open(precfile) as fh:
969  for line in fh:
970  m = r.match(line)
971  if m:
972  return float(m.group(1))
973 
974  precision = property(lambda self: self._get_precision(),
975  lambda self, val: None)
976 
977 class _SimpleEnsemble(ihm.model.Ensemble):
978  """Simple manually-created ensemble"""
979 
980  num_models_deposited = None # Override base class property
981 
982  def __init__(self, pp, model_group, num_models, drmsd,
983  num_models_deposited, ensemble_file):
984  super(_SimpleEnsemble, self).__init__(
985  model_group=model_group, post_process=pp, num_models=num_models,
986  file=ensemble_file, precision=drmsd, name=model_group.name,
987  clustering_feature='dRMSD')
988  self.num_models_deposited = num_models_deposited
989 
990  def load_localization_density(self, state, component, asym, local_file):
991  den = ihm.model.LocalizationDensity(file=local_file, asym_unit=asym)
992  self.densities.append(den)
993 
994 
995 class _EntityMapper(dict):
996  """Handle mapping from IMP components (without copy number) to CIF entities.
997  Multiple components may map to the same entity if they share sequence."""
998  def __init__(self, system):
999  super(_EntityMapper, self).__init__()
1000  self._sequence_dict = {}
1001  self._entities = []
1002  self.system = system
1003 
1004  def add(self, component_name, sequence, offset):
1005  def entity_seq(sequence):
1006  # Map X to UNK
1007  if 'X' in sequence:
1008  return ['UNK' if s == 'X' else s for s in sequence]
1009  else:
1010  return sequence
1011  if sequence not in self._sequence_dict:
1012  # Use the name of the first component, stripped of any copy number,
1013  # as the description of the entity
1014  d = component_name.split("@")[0].split(".")[0]
1015  entity = Entity(entity_seq(sequence), description=d,
1016  pmi_offset=offset)
1017  self.system.entities.append(entity)
1018  self._sequence_dict[sequence] = entity
1019  self[component_name] = self._sequence_dict[sequence]
1020 
1021 
1022 class _TransformedComponent(object):
1023  def __init__(self, name, original, transform):
1024  self.name, self.original, self.transform = name, original, transform
1025 
1026 
1027 class _SimpleRef(object):
1028  """Class with similar interface to weakref.ref, but keeps a strong ref"""
1029  def __init__(self, ref):
1030  self.ref = ref
1031  def __call__(self):
1032  return self.ref
1033 
1034 
1035 class _State(ihm.model.State):
1036  """Representation of a single state in the system."""
1037 
1038  def __init__(self, pmi_object, po):
1039  # Point to the PMI object for this state. Use a weak reference
1040  # since the state object typically points to us too, so we need
1041  # to break the reference cycle. In PMI1 this will be a
1042  # Representation object; in PMI2 it is the PMI2 State object itself.
1043  self._pmi_object = weakref.proxy(pmi_object)
1044  if hasattr(pmi_object, 'state'):
1045  # Need a strong ref to pmi_object.state to prevent it from being
1046  # cleaned up when doing PMI1 multistate modeling
1047  self._pmi_state = _SimpleRef(pmi_object.state)
1048  else:
1049  self._pmi_state = weakref.ref(pmi_object)
1050  # Preserve PMI state name
1051  old_name = self.name
1052  super(_State, self).__init__(experiment_type='Fraction of bulk')
1053  self.name = old_name
1054 
1055  # The assembly of all components modeled by IMP in this state.
1056  # This may be smaller than the complete assembly.
1057  self.modeled_assembly = ihm.Assembly(
1058  name="Modeled assembly",
1059  description=self.get_postfixed_name(
1060  "All components modeled by IMP"))
1061  po.system.orphan_assemblies.append(self.modeled_assembly)
1062 
1063  self.all_modeled_components = []
1064 
1065  def __hash__(self):
1066  return hash(self._pmi_state())
1067  def __eq__(self, other):
1068  return self._pmi_state() == other._pmi_state()
1069 
1070  def add_model_group(self, group):
1071  self.append(group)
1072  return group
1073 
1074  def get_prefixed_name(self, name):
1075  """Prefix the given name with the state name, if available."""
1076  if self.short_name:
1077  return self.short_name + ' ' + name
1078  else:
1079  # Can't use capitalize() since that un-capitalizes everything
1080  # but the first letter
1081  return name[0].upper() + name[1:] if name else ''
1082 
1083  def get_postfixed_name(self, name):
1084  """Postfix the given name with the state name, if available."""
1085  if self.short_name:
1086  return "%s in state %s" % (name, self.short_name)
1087  else:
1088  return name
1089 
1090  short_name = property(lambda self: self._pmi_state().short_name)
1091  long_name = property(lambda self: self._pmi_state().long_name)
1092  def __get_name(self):
1093  return self._pmi_state().long_name
1094  def __set_name(self, val):
1095  self._pmi_state().long_name = val
1096  name = property(__get_name, __set_name)
1097 
1098 
1099 class Entity(ihm.Entity):
1100  """A single entity in the system.
1101  This functions identically to the base ihm.Entity class, but it
1102  allows identifying residues by either the IHM numbering scheme
1103  (seq_id, which is always contiguous starting at 1) or by the PMI
1104  scheme (which is similar, but may not start at 1 if the sequence in
1105  the FASTA file has one or more N-terminal gaps"""
1106  def __init__(self, sequence, pmi_offset, *args, **keys):
1107  # Offset between PMI numbering and IHM; <pmi_#> = <ihm_#> + pmi_offset
1108  # (pmi_offset is also the number of N-terminal gaps in the FASTA file)
1109  self.pmi_offset = pmi_offset
1110  super(Entity, self).__init__(sequence, *args, **keys)
1111 
1112  def pmi_residue(self, res_id):
1113  """Return a single IHM residue indexed using PMI numbering"""
1114  return self.residue(res_id - self.pmi_offset)
1115 
1116  def pmi_range(self, res_id_begin, res_id_end):
1117  """Return a range of IHM residues indexed using PMI numbering"""
1118  off = self.pmi_offset
1119  return self(res_id_begin - off, res_id_end - off)
1120 
1121 
1122 class AsymUnit(ihm.AsymUnit):
1123  """A single asymmetric unit in the system.
1124  This functions identically to the base ihm.AsymUnit class, but it
1125  allows identifying residues by either the IHM numbering scheme
1126  (seq_id, which is always contiguous starting at 1) or by the PMI
1127  scheme (which is similar, but may not start at 1 if the sequence in
1128  the FASTA file has one or more N-terminal gaps"""
1129 
1130  def __init__(self, entity, *args, **keys):
1131  super(AsymUnit, self).__init__(
1132  entity, auth_seq_id_map=entity.pmi_offset, *args, **keys)
1133 
1134  def pmi_residue(self, res_id):
1135  """Return a single IHM residue indexed using PMI numbering"""
1136  return self.residue(res_id - self.entity.pmi_offset)
1137 
1138  def pmi_range(self, res_id_begin, res_id_end):
1139  """Return a range of IHM residues indexed using PMI numbering"""
1140  off = self.entity.pmi_offset
1141  return self(res_id_begin - off, res_id_end - off)
1142 
1143 
1145  """Class to encode a modeling protocol as mmCIF.
1146 
1147  IMP has basic support for writing out files in mmCIF format, for
1148  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
1149  After creating an instance of this class, attach it to an
1150  IMP.pmi.representation.Representation object. After this, any
1151  generated models and metadata are automatically collected and
1152  output as mmCIF.
1153  """
1154  def __init__(self, fh):
1155  # Ultimately, collect data in an ihm.System object
1156  self.system = ihm.System()
1157  self._state_group = ihm.model.StateGroup()
1158  self.system.state_groups.append(self._state_group)
1159 
1160  self.fh = fh
1161  self._state_ensemble_offset = 0
1162  self._main_script = os.path.abspath(sys.argv[0])
1163 
1164  # Point to the main modeling script
1165  loc = ihm.location.WorkflowFileLocation(path=self._main_script,
1166  details="The main integrative modeling script")
1167  self.system.locations.append(loc)
1168 
1169  self._states = {}
1170  self.__asym_states = {}
1171  self._working_directory = os.getcwd()
1172  self.default_representation = self.create_representation(
1173  "Default representation")
1174  self.entities = _EntityMapper(self.system)
1175  # Mapping from component names to ihm.AsymUnit
1176  self.asym_units = {}
1177  self._all_components = {}
1178  self.all_modeled_components = []
1179  self._transformed_components = []
1180  self.sequence_dict = {}
1181 
1182  # Common geometric objects used in PMI systems
1183  self._xy_plane = ihm.geometry.XYPlane()
1184  self._xz_plane = ihm.geometry.XZPlane()
1185  self._z_axis = ihm.geometry.ZAxis()
1186  self._center_origin = ihm.geometry.Center(0,0,0)
1187  self._identity_transform = ihm.geometry.Transformation.identity()
1188 
1189  # Coordinates to exclude
1190  self._exclude_coords = {}
1191 
1192  self.all_representations = _AllModelRepresentations(self)
1193  self.all_protocols = _AllProtocols(self)
1194  self.all_datasets = _AllDatasets(self.system)
1195  self.all_starting_models = _AllStartingModels(self)
1196 
1197  self.all_software = _AllSoftware(self.system)
1198 
1199  def create_representation(self, name):
1200  """Create a new Representation and return it. This can be
1201  passed to add_model(), add_bead_element() or add_pdb_element()."""
1202  r = ihm.representation.Representation(name=name)
1203  self.system.orphan_representations.append(r)
1204  return r
1205 
1206  def exclude_coordinates(self, component, seqrange):
1207  """Don't record coordinates for the given domain.
1208  Coordinates for the given domain (specified by a component name
1209  and a 2-element tuple giving the start and end residue numbers)
1210  will be excluded from the mmCIF file. This can be used to exclude
1211  parts of the structure that weren't well resolved in modeling.
1212  Any bead or residue that lies wholly within this range will be
1213  excluded. Multiple ranges for a given component can be excluded
1214  by calling this method multiple times."""
1215  if component not in self._exclude_coords:
1216  self._exclude_coords[component] = []
1217  self._exclude_coords[component].append(seqrange)
1218 
1219  def _is_excluded(self, component, start, end):
1220  """Return True iff this chunk of sequence should be excluded"""
1221  for seqrange in self._exclude_coords.get(component, ()):
1222  if start >= seqrange[0] and end <= seqrange[1]:
1223  return True
1224 
1225  def _add_state(self, state):
1226  """Create a new state and return a pointer to it."""
1227  self._state_ensemble_offset = len(self.system.ensembles)
1228  s = _State(state, self)
1229  self._state_group.append(s)
1230  self._last_state = s
1231  return s
1232 
1233  def _get_chain_for_component(self, name, output):
1234  """Get the chain ID for a component, if any."""
1235  # todo: handle multiple copies
1236  if name in self.asym_units:
1237  return self.asym_units[name]._id
1238  else:
1239  # A non-modeled component doesn't have a chain ID
1240  return None
1241 
1242  def _get_assembly_comps(self, assembly):
1243  """Get the names of the components in the given assembly"""
1244  # component name = asym_unit.details
1245  comps = {}
1246  for ca in assembly:
1247  comps[ca.details] = None
1248  return comps
1249 
1250  def create_transformed_component(self, state, name, original, transform):
1251  """Make a new component that's a transformed copy of another.
1252  All representation for the existing component is copied to the
1253  new one."""
1254  assembly_comps = self._get_assembly_comps(state.modeled_assembly)
1255  if name in assembly_comps:
1256  raise ValueError("Component %s already exists" % name)
1257  elif original not in assembly_comps:
1258  raise ValueError("Original component %s does not exist" % original)
1259  self.create_component(state, name, True)
1260  self.add_component_sequence(state, name, self.sequence_dict[original])
1261  self._transformed_components.append(_TransformedComponent(
1262  name, original, transform))
1263  self.all_representations.copy_component(state, name, original,
1264  self.asym_units[name])
1265 
1266  def create_component(self, state, name, modeled, asym_name=None):
1267  if asym_name is None:
1268  asym_name = name
1269  new_comp = name not in self._all_components
1270  self._all_components[name] = None
1271  if modeled:
1272  state.all_modeled_components.append(name)
1273  if new_comp:
1274  # assign asym once we get sequence
1275  self.asym_units[asym_name] = None
1276  self.all_modeled_components.append(name)
1277 
1278  def add_component_sequence(self, state, name, seq, asym_name=None):
1279  if asym_name is None:
1280  asym_name = name
1281  def get_offset(seq):
1282  # Count length of gaps at start of sequence
1283  for i in range(len(seq)):
1284  if seq[i] != '-':
1285  return seq[i:], i
1286  seq, offset = get_offset(seq)
1287  if name in self.sequence_dict:
1288  if self.sequence_dict[name] != seq:
1289  raise ValueError("Sequence mismatch for component %s" % name)
1290  else:
1291  self.sequence_dict[name] = seq
1292  self.entities.add(name, seq, offset)
1293  if asym_name in self.asym_units:
1294  if self.asym_units[asym_name] is None:
1295  # Set up a new asymmetric unit for this component
1296  entity = self.entities[name]
1297  asym = AsymUnit(entity, details=asym_name)
1298  self.system.asym_units.append(asym)
1299  self.asym_units[asym_name] = asym
1300  state.modeled_assembly.append(self.asym_units[asym_name])
1301 
1302  def _add_restraint_model_fits(self):
1303  """Add fits to restraints for all known models"""
1304  for group, m in self.system._all_models():
1305  if m._is_restrained:
1306  for r in self.system.restraints:
1307  if hasattr(r, 'add_fits_from_model_statfile'):
1308  r.add_fits_from_model_statfile(m)
1309 
1310  def flush(self, format='mmCIF'):
1311  """Write out all information to the file.
1312  Information can be written in any format supported by
1313  the ihm library (typically this is 'mmCIF' or 'BCIF')."""
1314  self.finalize()
1315  ihm.dumper.write(self.fh, [self.system], format,
1316  dumpers=[_ReplicaExchangeProtocolDumper])
1317 
1318  def finalize(self):
1319  """Do any final processing on the class hierarchy.
1320  After calling this method, the `system` member (an instance
1321  of `ihm.System`) completely reproduces the PMI modeling, and
1322  can be written out to an mmCIF file with `ihm.dumper.write`,
1323  and/or modified using the ihm API."""
1324  self._add_restraint_model_fits()
1325 
1326  def add_pdb_element(self, state, name, start, end, offset, pdbname,
1327  chain, hier, representation=None):
1328  if self._is_excluded(name, start, end):
1329  return
1330  if representation is None:
1331  representation = self.default_representation
1332  asym = self.asym_units[name]
1333  p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
1334  hier, asym)
1335  self.all_representations.add_fragment(state, representation, p)
1336  self.all_starting_models.add_pdb_fragment(p)
1337 
1338  def add_bead_element(self, state, name, start, end, num, hier,
1339  representation=None):
1340  if self._is_excluded(name, start, end):
1341  return
1342  if representation is None:
1343  representation = self.default_representation
1344  asym = self.asym_units[name]
1345  pmi_offset = asym.entity.pmi_offset
1346  b = _BeadsFragment(state, name, start - pmi_offset, end - pmi_offset,
1347  num, hier, asym)
1348  self.all_representations.add_fragment(state, representation, b)
1349 
1350  def get_cross_link_group(self, pmi_restraint):
1351  r = _CrossLinkRestraint(pmi_restraint)
1352  self.system.restraints.append(r)
1353  self._add_restraint_dataset(r) # so that all-dataset group works
1354  return r
1355 
1356  def add_experimental_cross_link(self, r1, c1, r2, c2, rsr):
1357  if c1 not in self._all_components or c2 not in self._all_components:
1358  # Crosslink refers to a component we didn't model
1359  # As a quick hack, just ignore it.
1360  # todo: need to add an entity for this anyway (so will need the
1361  # sequence, and add to struct_assembly)
1362  return None
1363  e1 = self.entities[c1]
1364  e2 = self.entities[c2]
1365  xl = ihm.restraint.ExperimentalCrossLink(residue1=e1.pmi_residue(r1),
1366  residue2=e2.pmi_residue(r2))
1367  rsr.experimental_cross_links.append([xl])
1368  return xl
1369 
1370  def add_cross_link(self, state, ex_xl, p1, p2, length, sigma1_p, sigma2_p,
1371  psi_p, rsr):
1372  # Map p1/p2 to asym units
1373  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1374  d = ihm.restraint.UpperBoundDistanceRestraint(length)
1375 
1376  if _get_by_residue(p1) and _get_by_residue(p2):
1377  cls = _ResidueCrossLink
1378  else:
1379  cls = _FeatureCrossLink
1380  xl = cls(ex_xl, asym1=asym[p1], asym2=asym[p2], distance=d,
1381  restrain_all=True)
1382  # Needed to look up fits later
1383  xl.psi_p, xl.sigma1_p, xl.sigma2_p = psi_p, sigma1_p, sigma2_p
1384  rsr.cross_links.append(xl)
1385 
1386  def add_replica_exchange(self, state, rex):
1387  # todo: allow for metadata to say how many replicas were used in the
1388  # actual experiment, and how many independent runs were carried out
1389  # (use these as multipliers to get the correct total number of
1390  # output models)
1391  step = _ReplicaExchangeProtocolStep(state, rex)
1392  step.software = self.all_software.pmi
1393  self.all_protocols.add_step(step, state)
1394 
1395  def _add_simple_dynamics(self, num_models_end, method):
1396  # Always assumed that we're dealing with the last state
1397  state = self._last_state
1398  self.all_protocols.add_step(_SimpleProtocolStep(state, num_models_end,
1399  method), state)
1400 
1401  def _add_protocol(self):
1402  # Always assumed that we're dealing with the last state
1403  state = self._last_state
1404  self.all_protocols.add_protocol(state)
1405 
1406  def _add_dataset(self, dataset):
1407  return self.all_datasets.add(self._last_state, dataset)
1408 
1409  def _add_restraint_dataset(self, restraint):
1410  return self.all_datasets.add_restraint(self._last_state, restraint)
1411 
1412  def _add_simple_postprocessing(self, num_models_begin, num_models_end):
1413  # Always assumed that we're dealing with the last state
1414  state = self._last_state
1415  pp = ihm.analysis.ClusterStep('RMSD', num_models_begin, num_models_end)
1416  self.all_protocols.add_postproc(pp, state)
1417  return pp
1418 
1419  def _add_no_postprocessing(self, num_models):
1420  # Always assumed that we're dealing with the last state
1421  state = self._last_state
1422  pp = ihm.analysis.EmptyStep()
1423  pp.num_models_begin = pp.num_models_end = num_models
1424  self.all_protocols.add_postproc(pp, state)
1425  return pp
1426 
1427  def _add_simple_ensemble(self, pp, name, num_models, drmsd,
1428  num_models_deposited, localization_densities,
1429  ensemble_file):
1430  """Add an ensemble generated by ad hoc methods (not using PMI).
1431  This is currently only used by the Nup84 system."""
1432  # Always assumed that we're dealing with the last state
1433  state = self._last_state
1434  group = ihm.model.ModelGroup(name=state.get_postfixed_name(name))
1435  state.add_model_group(group)
1436  if ensemble_file:
1437  self.system.locations.append(ensemble_file)
1438  e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
1439  ensemble_file)
1440  self.system.ensembles.append(e)
1441  for c in state.all_modeled_components:
1442  den = localization_densities.get(c, None)
1443  if den:
1444  e.load_localization_density(state, c, self.asym_units[c], den)
1445  return e
1446 
1447  def set_ensemble_file(self, i, location):
1448  """Point a previously-created ensemble to an 'all-models' file.
1449  This could be a trajectory such as DCD, an RMF, or a multimodel
1450  PDB file."""
1451  self.system.locations.append(location)
1452  # Ensure that we point to an ensemble related to the current state
1453  ind = i + self._state_ensemble_offset
1454  self.system.ensembles[ind].file = location
1455 
1456  def add_replica_exchange_analysis(self, state, rex, density_custom_ranges):
1457  # todo: add prefilter as an additional postprocess step (complication:
1458  # we don't know how many models it filtered)
1459  # todo: postpone rmsf/density localization extraction until after
1460  # relevant methods are called (currently it is done from the
1461  # constructor)
1462  protocol = self.all_protocols.get_last_protocol(state)
1463  num_models = protocol.steps[-1].num_models_end
1464  pp = _ReplicaExchangeAnalysisPostProcess(rex, num_models)
1465  pp.software = self.all_software.pmi
1466  self.all_protocols.add_postproc(pp, state)
1467  for i in range(rex._number_of_clusters):
1468  group = ihm.model.ModelGroup(name=state.get_prefixed_name(
1469  'cluster %d' % (i + 1)))
1470  state.add_model_group(group)
1471  # todo: make # of models to deposit configurable somewhere
1472  e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
1473  self.system.ensembles.append(e)
1474  # Add localization density info if available
1475  for fname, stuple in sorted(density_custom_ranges.items()):
1476  e.load_localization_density(state, fname, stuple,
1477  self.asym_units)
1478  for stats in e.load_all_models(self, state):
1479  m = self.add_model(group)
1480  # Since we currently only deposit 1 model, it is the
1481  # best scoring one
1482  m.name = 'Best scoring model'
1483  m.stats = stats
1484  # Add RMSF info if available
1485  for c in state.all_modeled_components:
1486  e.load_rmsf(m, c)
1487 
1488  def _get_subassembly(self, comps, name, description):
1489  """Get an Assembly consisting of the given components.
1490  `compdict` is a dictionary of the components to add, where keys
1491  are the component names and values are the sequence ranges (or
1492  None to use all residues in the component)."""
1493  asyms = []
1494  for comp, seqrng in comps.items():
1495  a = self.asym_units[comp]
1496  asyms.append(a if seqrng is None else a(*seqrng))
1497 
1498  a = ihm.Assembly(asyms, name=name, description=description)
1499  return a
1500 
1501  def _add_foxs_restraint(self, model, comp, seqrange, dataset, rg, chi,
1502  details):
1503  """Add a basic FoXS fit. This is largely intended for use from the
1504  NPC application."""
1505  assembly = self._get_subassembly({comp:seqrange},
1506  name="SAXS subassembly",
1507  description="All components that fit SAXS data")
1508  r = ihm.restraint.SASRestraint(dataset, assembly, segment=False,
1509  fitting_method='FoXS', fitting_atom_type='Heavy atoms',
1510  multi_state=False, radius_of_gyration=rg, details=details)
1511  r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi)
1512  self.system.restraints.append(r)
1513  self._add_restraint_dataset(r) # so that all-dataset group works
1514 
1515  def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
1516  image_resolution, projection_number,
1517  micrographs_number):
1518  r = _EM2DRestraint(state, r, i, resolution, pixel_size,
1519  image_resolution, projection_number,
1520  micrographs_number)
1521  self.system.restraints.append(r)
1522  self._add_restraint_dataset(r) # so that all-dataset group works
1523 
1524  def add_em3d_restraint(self, state, target_ps, densities, pmi_restraint):
1525  # todo: need to set allow_duplicates on this dataset?
1526  r = _EM3DRestraint(self, state, pmi_restraint, target_ps, densities)
1527  self.system.restraints.append(r)
1528  self._add_restraint_dataset(r) # so that all-dataset group works
1529 
1530  def add_zaxial_restraint(self, state, ps, lower_bound, upper_bound,
1531  sigma, pmi_restraint):
1532  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1533  sigma, pmi_restraint, self._xy_plane)
1534 
1535  def add_yaxial_restraint(self, state, ps, lower_bound, upper_bound,
1536  sigma, pmi_restraint):
1537  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1538  sigma, pmi_restraint, self._xz_plane)
1539 
1540  def add_xyradial_restraint(self, state, ps, lower_bound, upper_bound,
1541  sigma, pmi_restraint):
1542  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1543  sigma, pmi_restraint, self._z_axis)
1544 
1545  def _add_geometric_restraint(self, state, ps, lower_bound, upper_bound,
1546  sigma, pmi_restraint, geom):
1547  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1548  r = _GeometricRestraint(self, state, pmi_restraint, geom,
1549  asym.get_feature(ps),
1550  ihm.restraint.LowerUpperBoundDistanceRestraint(
1551  lower_bound, upper_bound),
1552  sigma)
1553  self.system.restraints.append(r)
1554  self._add_restraint_dataset(r) # so that all-dataset group works
1555 
1556  def _get_membrane(self, tor_R, tor_r, tor_th):
1557  """Get an object representing a half-torus membrane"""
1558  if not hasattr(self, '_seen_membranes'):
1559  self._seen_membranes = {}
1560  # If a membrane already exists with all dimensions within 0.01 of
1561  # this one, reuse it
1562  membrane_id = tuple(int(x * 100.) for x in (tor_R, tor_r, tor_th))
1563  if membrane_id not in self._seen_membranes:
1564  m = ihm.geometry.HalfTorus(center=self._center_origin,
1565  transformation=self._identity_transform,
1566  major_radius=tor_R, minor_radius=tor_r, thickness=tor_th,
1567  inner=True, name='Membrane')
1568  self._seen_membranes[membrane_id] = m
1569  return self._seen_membranes[membrane_id]
1570 
1571  def add_membrane_surface_location_restraint(
1572  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1573  self._add_membrane_restraint(state, ps, tor_R, tor_r, tor_th, sigma,
1574  pmi_restraint, ihm.restraint.UpperBoundDistanceRestraint(0.))
1575 
1576  def add_membrane_exclusion_restraint(
1577  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1578  self._add_membrane_restraint(state, ps, tor_R, tor_r, tor_th, sigma,
1579  pmi_restraint, ihm.restraint.LowerBoundDistanceRestraint(0.))
1580 
1581  def _add_membrane_restraint(self, state, ps, tor_R, tor_r, tor_th,
1582  sigma, pmi_restraint, rsr):
1583  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1584  r = _GeometricRestraint(self, state, pmi_restraint,
1585  self._get_membrane(tor_R, tor_r, tor_th),
1586  asym.get_feature(ps), rsr, sigma)
1587  self.system.restraints.append(r)
1588  self._add_restraint_dataset(r) # so that all-dataset group works
1589 
1590  def add_model(self, group, assembly=None, representation=None):
1591  state = self._last_state
1592  if representation is None:
1593  representation = self.default_representation
1594  protocol = self.all_protocols.get_last_protocol(state)
1595  m = _Model(state.prot, self, protocol,
1596  assembly if assembly else state.modeled_assembly,
1597  representation)
1598  group.append(m)
1599  return m
1600 
1601 
1602 def read(fh, model_class=ihm.model.Model, format='mmCIF', handlers=[]):
1603  """Read data from the mmCIF file handle `fh`.
1604  This is a simple wrapper around `ihm.reader.read()`, which also
1605  reads PMI-specific information from the mmCIF or BinaryCIF file."""
1606  return ihm.reader.read(fh, model_class=model_class, format=format,
1607  handlers=[_ReplicaExchangeProtocolHandler]+handlers)
1608 
1609 
1610 class GMMParser(ihm.metadata.Parser):
1611  """Extract metadata from an EM density GMM file."""
1612 
1613  def parse_file(self, filename):
1614  """Extract metadata from `filename`.
1615  @return a dict with key `dataset` pointing to the GMM file and
1616  `number_of_gaussians` to the number of GMMs (or None)"""
1617  l = ihm.location.InputFileLocation(filename,
1618  details="Electron microscopy density map, "
1619  "represented as a Gaussian Mixture Model (GMM)")
1620  # A 3DEM restraint's dataset ID uniquely defines the mmCIF restraint, so
1621  # we need to allow duplicates
1622  l._allow_duplicates = True
1623  d = ihm.dataset.EMDensityDataset(l)
1624  ret = {'dataset':d, 'number_of_gaussians':None}
1625 
1626  with open(filename) as fh:
1627  for line in fh:
1628  if line.startswith('# data_fn: '):
1629  p = ihm.metadata.MRCParser()
1630  fn = line[11:].rstrip('\r\n')
1631  dataset = p.parse_file(os.path.join(
1632  os.path.dirname(filename), fn))['dataset']
1633  ret['dataset'].parents.append(dataset)
1634  elif line.startswith('# ncenters: '):
1635  ret['number_of_gaussians'] = int(line[12:])
1636  return ret
Select non water and non hydrogen atoms.
Definition: pdb.h:245
def create_representation
Create a new Representation and return it.
Definition: mmcif.py:1199
def create_transformed_component
Make a new component that's a transformed copy of another.
Definition: mmcif.py:1250
Simple 3D transformation class.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:156
def exclude_coordinates
Don't record coordinates for the given domain.
Definition: mmcif.py:1209
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Class to encode a modeling protocol as mmCIF.
Definition: mmcif.py:1150
def flush
Write out all information to the file.
Definition: mmcif.py:1310
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
def parse_file
Extract metadata from filename.
Definition: mmcif.py:1614
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: mmcif.py:1116
Miscellaneous utilities.
Definition: tools.py:1
Extract metadata from an EM density GMM file.
Definition: mmcif.py:1610
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:731
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: mmcif.py:1134
void read_pdb(TextInput input, int model, Hierarchy h)
A single asymmetric unit in the system.
Definition: mmcif.py:1126
A single entity in the system.
Definition: mmcif.py:1099
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
Definition: mmcif.py:1447
Classes to represent data structures used in mmCIF.
Definition: data.py:1
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: mmcif.py:1138
void add_restraint(RMF::FileHandle fh, Restraint *hs)
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def finalize
Do any final processing on the class hierarchy.
Definition: mmcif.py:1318
Base class for capturing a modeling protocol.
Definition: output.py:40
Basic utilities for handling cryo-electron microscopy 3D density maps.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:60
3D rotation class.
Definition: Rotation3D.h:51
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
A decorator for a residue.
Definition: Residue.h:135
Basic functionality that is expected to be used by a wide variety of IMP users.
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: mmcif.py:1112
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
def read
Read data from the mmCIF file handle fh.
Definition: mmcif.py:1602
VectorD< 3 > Vector3D
Definition: VectorD.h:421
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:189
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...