IMP logo
IMP Reference Guide  2.11.1
The Integrative Modeling Platform
mmcif.py
1 """@namespace IMP.pmi.mmcif
2  @brief Support for the mmCIF file format.
3 
4  IMP has basic support for writing out files in mmCIF format, for
5  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
6  mmCIF files are currently generated by creating an
7  IMP.pmi.mmcif.ProtocolOutput class, and attaching it to an
8  IMP.pmi.representation.Representation object, after which any
9  generated models and metadata are collected and output as mmCIF.
10 """
11 
12 from __future__ import print_function
13 import copy
14 import IMP.core
15 import IMP.algebra
16 import IMP.atom
17 import IMP.em
18 import IMP.isd
20 import IMP.pmi.tools
21 from IMP.pmi.tools import OrderedDict
22 import IMP.pmi.output
23 import IMP.mmcif.data
24 import re
25 import ast
26 import sys
27 import os
28 import textwrap
29 import weakref
30 import operator
31 import itertools
32 import ihm.location
33 import ihm.dataset
34 import ihm.dumper
35 import ihm.reader
36 import ihm.metadata
37 import ihm.startmodel
38 import ihm.model
39 import ihm.protocol
40 import ihm.analysis
41 import ihm.representation
42 import ihm.geometry
43 import ihm.cross_linkers
44 
45 def _assign_id(obj, seen_objs, obj_by_id):
46  """Assign a unique ID to obj, and track all ids in obj_by_id."""
47  if obj not in seen_objs:
48  if not hasattr(obj, 'id'):
49  obj_by_id.append(obj)
50  obj.id = len(obj_by_id)
51  seen_objs[obj] = obj.id
52  else:
53  obj.id = seen_objs[obj]
54 
55 
56 def _get_by_residue(p):
57  """Determine whether the given particle represents a specific residue
58  or a more coarse-grained object."""
60 
61 
62 class _ComponentMapper(object):
63  """Map a Particle to a component name"""
64  def __init__(self, prot):
65  self.o = IMP.pmi.output.Output()
66  self.prot = prot
67  self.name = 'cif-output'
68  self.o.dictionary_pdbs[self.name] = self.prot
69  self.o._init_dictchain(self.name, self.prot, multichar_chain=True)
70 
71  def __getitem__(self, p):
72  protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
73  return protname
74 
75 
76 class _AsymMapper(object):
77  """Map a Particle to an asym_unit"""
78  def __init__(self, simo, prot):
79  self.simo = simo
80  self._cm = _ComponentMapper(prot)
81  self._seen_ranges = {}
82 
83  def __getitem__(self, p):
84  protname = self._cm[p]
85  return self.simo.asym_units[protname]
86 
87  def get_feature(self, ps):
88  """Get an ihm.restraint.Feature that covers the given particles"""
89  # todo: handle things other than residues
90  rngs = []
91  for p in ps:
92  asym = self[p]
93  # todo: handle overlapping ranges
95  rind = IMP.atom.Residue(p).get_index()
96  rng = asym(rind, rind)
98  # PMI Fragments always contain contiguous residues
99  rinds = IMP.atom.Fragment(p).get_residue_indexes()
100  rng = asym(rinds[0], rinds[-1])
101  else:
102  raise ValueError("Unsupported particle type %s" % str(p))
103  # Join contiguous ranges
104  if len(rngs) > 0 and rngs[-1].asym == asym \
105  and rngs[-1].seq_id_range[1] == rng.seq_id_range[0] - 1:
106  rngs[-1].seq_id_range = (rngs[-1].seq_id_range[0],
107  rng.seq_id_range[1])
108  else:
109  rngs.append(rng)
110  # If an identical feature already exists, return that
111  # todo: python-ihm should handle this automatically for us
112  hrngs = tuple(rngs)
113  if hrngs in self._seen_ranges:
114  return self._seen_ranges[hrngs]
115  else:
116  feat = ihm.restraint.ResidueFeature(rngs)
117  self._seen_ranges[hrngs] = feat
118  return feat
119 
120 
121 class _AllSoftware(object):
122  def __init__(self, system):
123  self.system = system
124  self.modeller_used = self.phyre2_used = False
125  self.pmi = ihm.Software(
126  name="IMP PMI module",
127  version=IMP.pmi.__version__,
128  classification="integrative model building",
129  description="integrative model building",
130  location='https://integrativemodeling.org')
131  self.imp = ihm.Software(
132  name="Integrative Modeling Platform (IMP)",
133  version=IMP.__version__,
134  classification="integrative model building",
135  description="integrative model building",
136  location='https://integrativemodeling.org')
137  self.system.software.extend([self.pmi, self.imp])
138 
139  def set_modeller_used(self, version, date):
140  if self.modeller_used:
141  return
142  self.modeller_used = True
143  self.system.software.append(ihm.Software(
144  name='MODELLER', classification='comparative modeling',
145  description='Comparative modeling by satisfaction '
146  'of spatial restraints, build ' + date,
147  location='https://salilab.org/modeller/',
148  version=version))
149 
150  def set_phyre2_used(self):
151  if self.phyre2_used:
152  return
153  self.phyre2_used = True
154  self.system.software.append(ihm.Software(
155  name='Phyre2', classification='protein homology modeling',
156  description='Protein Homology/analogY Recognition '
157  'Engine V 2.0',
158  version='2.0',
159  location='http://www.sbg.bio.ic.ac.uk/~phyre2/'))
160 
161 def _get_fragment_is_rigid(fragment):
162  """Determine whether a fragment is modeled rigidly"""
163  leaves = IMP.atom.get_leaves(fragment.hier)
164  # Assume all leaves are set up as rigid/flexible in the same way
165  return IMP.core.RigidMember.get_is_setup(leaves[0])
166 
167 class _PDBFragment(ihm.representation.ResidueSegment):
168  """Record details about part of a PDB file used as input
169  for a component."""
170  def __init__(self, state, component, start, end, pdb_offset,
171  pdbname, chain, hier, asym_unit):
172  # start, end are PMI residue indexes (not IHM)
173  super(_PDBFragment, self).__init__(
174  asym_unit=asym_unit.pmi_range(start, end),
175  rigid=None, primitive='sphere')
176  self.component, self.start, self.end, self.offset, self.pdbname \
177  = component, start, end, pdb_offset, pdbname
178  self.state, self.chain, self.hier = state, chain, hier
181  self.starting_hier = IMP.atom.read_pdb(pdbname, state.model, sel)
182 
183  rigid = property(lambda self: _get_fragment_is_rigid(self),
184  lambda self, val: None)
185 
186  def combine(self, other):
187  pass
188 
189 
190 class _BeadsFragment(ihm.representation.FeatureSegment):
191  """Record details about beads used to represent part of a component."""
192  chain = None
193  def __init__(self, state, component, start, end, count, hier, asym_unit):
194  super(_BeadsFragment, self).__init__(asym_unit=asym_unit(start, end),
195  rigid=None, primitive='sphere', count=count)
196  self.state, self.component, self.hier \
197  = state, component, hier
198 
199  rigid = property(lambda self: _get_fragment_is_rigid(self),
200  lambda self, val: None)
201 
202  def combine(self, other):
203  # todo: don't combine if one fragment is rigid and the other flexible
204  if type(other) == type(self) and \
205  other.asym_unit.seq_id_range[0] == self.asym_unit.seq_id_range[1] + 1:
206  self.asym_unit.seq_id_range = (self.asym_unit.seq_id_range[0],
207  other.asym_unit.seq_id_range[1])
208  self.count += other.count
209  return True
210 
211 
212 class _AllModelRepresentations(object):
213  def __init__(self, simo):
214  self.simo = simo
215  # dict of fragments, ordered by representation, then component name,
216  # then state
217  self.fragments = OrderedDict()
218  self._all_representations = {}
219 
220  def copy_component(self, state, name, original, asym_unit):
221  """Copy all representation for `original` in `state` to `name`"""
222  def copy_frag(f):
223  newf = copy.copy(f)
224  newf.asym_unit = asym_unit(*f.asym_unit.seq_id_range)
225  return newf
226  for rep in self.fragments:
227  if original in self.fragments[rep]:
228  if name not in self.fragments[rep]:
229  self.fragments[rep][name] = OrderedDict()
230  self.fragments[rep][name][state] = [copy_frag(f)
231  for f in self.fragments[rep][original][state]]
232  # Assume representation for a component is the same in all
233  # states, so only write out the first one
234  first_state = list(self.fragments[rep][name].keys())[0]
235  if state is first_state:
236  representation = self._all_representations[rep]
237  representation.extend(self.fragments[rep][name][state])
238 
239  def add_fragment(self, state, representation, fragment):
240  """Add a model fragment."""
241  comp = fragment.component
242  id_rep = id(representation)
243  self._all_representations[id_rep] = representation
244  if id_rep not in self.fragments:
245  self.fragments[id_rep] = OrderedDict()
246  if comp not in self.fragments[id_rep]:
247  self.fragments[id_rep][comp] = OrderedDict()
248  if state not in self.fragments[id_rep][comp]:
249  self.fragments[id_rep][comp][state] = []
250  fragments = self.fragments[id_rep][comp][state]
251  if len(fragments) == 0 or not fragments[-1].combine(fragment):
252  fragments.append(fragment)
253  # Assume representation for a component is the same in all states,
254  # so only write out the first one
255  first_state = list(self.fragments[id_rep][comp].keys())[0]
256  if state is first_state:
257  representation.append(fragment)
258 
259 
260 class _AllDatasets(object):
261  """Track all datasets generated by PMI and add them to the ihm.System"""
262  def __init__(self, system):
263  self.system = system
264  self._datasets = []
265  self._datasets_by_state = {}
266  self._restraints_by_state = {}
267 
268  def get_all_group(self, state):
269  """Get a DatasetGroup encompassing all datasets so far in this state"""
270  # Note that if a restraint dataset is replaced after this is called,
271  # the group will still contain the old dataset - mark dataset as read
272  # only?
273  g = ihm.dataset.DatasetGroup(self._datasets_by_state.get(state, [])
274  + [r.dataset for r in self._restraints_by_state.get(state, [])
275  if r.dataset])
276  return g
277 
278  def add(self, state, dataset):
279  """Add a new dataset."""
280  self._datasets.append(dataset)
281  if state not in self._datasets_by_state:
282  self._datasets_by_state[state] = []
283  self._datasets_by_state[state].append(dataset)
284  # Make sure that the dataset ends up in the mmCIF file even if nothing
285  # refers to it
286  self.system.orphan_datasets.append(dataset)
287  return dataset
288 
289  def add_restraint(self, state, restraint):
290  """Add the dataset for a restraint"""
291  if state not in self._restraints_by_state:
292  self._restraints_by_state[state] = []
293  self._restraints_by_state[state].append(restraint)
294 
295 
296 class _CrossLinkRestraint(ihm.restraint.CrossLinkRestraint):
297  """Restrain to a set of cross links"""
298 
299  assembly = None
300  _label_map = {'wtDSS':'DSS', 'scDSS':'DSS', 'scEDC':'EDC'}
301  _descriptor_map = {'DSS': ihm.cross_linkers.dss,
302  'EDC': ihm.cross_linkers.edc}
303 
304  def __init__(self, pmi_restraint):
305  self.pmi_restraint = pmi_restraint
306  label = self.pmi_restraint.label
307  # Map commonly-used subtypes to more standard labels
308  label = self._label_map.get(label, label)
309  self.label = label
310  super(_CrossLinkRestraint, self).__init__(
311  dataset=self.pmi_restraint.dataset,
312  linker=self._get_chem_descriptor(label))
313 
314  @classmethod
315  def _get_chem_descriptor(cls, label):
316  if label not in cls._descriptor_map:
317  # If label is not a standard linker type, make a new chemical
318  # descriptor containing just the name. We don't know the chemistry
319  # so cannot specify a SMILES or INCHI string for it at this point
320  d = ihm.ChemDescriptor(label)
321  cls._descriptor_map[label] = d
322  return cls._descriptor_map[label]
323 
324  def _set_psi_sigma(self, model):
325  old_values = []
326  # Do nothing if given a different state than the restraint applies to
327  if model.m != self.pmi_restraint.model:
328  return
329  for resolution in self.pmi_restraint.sigma_dictionary:
330  statname = 'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, self.label)
331  if model.stats and statname in model.stats:
332  sigma = float(model.stats[statname])
333  p = self.pmi_restraint.sigma_dictionary[resolution][0]
334  old_values.append((p, p.get_scale()))
335  p.set_scale(sigma)
336  for psiindex in self.pmi_restraint.psi_dictionary:
337  statname = 'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, self.label)
338  if model.stats and statname in model.stats:
339  psi = float(model.stats[statname])
340  p = self.pmi_restraint.psi_dictionary[psiindex][0]
341  old_values.append((p, p.get_scale()))
342  p.set_scale(psi)
343  # If the same particle is set multiple times, make sure we get the
344  # original value last
345  return list(reversed(old_values))
346 
347  def add_fits_from_model_statfile(self, model):
348  # Set psi/sigma for all particles
349  old_values = self._set_psi_sigma(model)
350  # If no stats were collected, we can't show the fit
351  if not old_values:
352  return
353  for xl in self.cross_links:
354  # Get current psi/sigma particle value for each XL
355  xl.fits[model] = ihm.restraint.CrossLinkFit(
356  psi=xl.psi, sigma1=xl.sigma1, sigma2=xl.sigma2)
357  # Restore original psi/sigma
358  for p, scale in old_values:
359  p.set_scale(scale)
360 
361  # Have our dataset point to that in the original PMI restraint
362  def __set_dataset(self, val):
363  self.pmi_restraint.dataset = val
364  dataset = property(lambda self: self.pmi_restraint.dataset,
365  __set_dataset)
366 
367 
368 def get_asym_mapper_for_state(simo, state, asym_map):
369  asym = asym_map.get(state, None)
370  if asym is None:
371  asym = _AsymMapper(simo, state.prot)
372  asym_map[state] = asym
373  return asym
374 
375 class _PMICrossLink(object):
376  # Query PMI particles to get psi and sigma values at time of use, not
377  # time of construction (since they may be set after creating the restraint)
378  psi = property(lambda self: self.psi_p.get_scale(),
379  lambda self, val: None)
380  sigma1 = property(lambda self: self.sigma1_p.get_scale(),
381  lambda self, val: None)
382  sigma2 = property(lambda self: self.sigma2_p.get_scale(),
383  lambda self, val: None)
384 
385 
386 class _ResidueCrossLink(ihm.restraint.ResidueCrossLink, _PMICrossLink):
387  pass
388 
389 
390 class _FeatureCrossLink(ihm.restraint.FeatureCrossLink, _PMICrossLink):
391  pass
392 
393 
394 class _EM2DRestraint(ihm.restraint.EM2DRestraint):
395  def __init__(self, state, pmi_restraint, image_number, resolution,
396  pixel_size, image_resolution, projection_number,
397  micrographs_number):
398  self.pmi_restraint, self.image_number = pmi_restraint, image_number
399  super(_EM2DRestraint, self).__init__(
400  dataset=pmi_restraint.datasets[image_number],
401  assembly=state.modeled_assembly,
402  segment=False, number_raw_micrographs=micrographs_number,
403  pixel_size_width=pixel_size, pixel_size_height=pixel_size,
404  image_resolution=image_resolution,
405  number_of_projections=projection_number)
406 
407  # Have our dataset point to that in the original PMI restraint
408  def __get_dataset(self):
409  return self.pmi_restraint.datasets[self.image_number]
410  def __set_dataset(self, val):
411  self.pmi_restraint.datasets[self.image_number] = val
412  dataset = property(__get_dataset, __set_dataset)
413 
414  def add_fits_from_model_statfile(self, model):
415  ccc = self._get_cross_correlation(model)
416  transform = self._get_transformation(model)
417  rot = transform.get_rotation()
418  rm = [[e for e in rot.get_rotation_matrix_row(i)] for i in range(3)]
419  self.fits[model] = ihm.restraint.EM2DRestraintFit(
420  cross_correlation_coefficient=ccc,
421  rot_matrix=rm,
422  tr_vector=transform.get_translation())
423 
424  def _get_transformation(self, model):
425  """Get the transformation that places the model on the image"""
426  stats = model.em2d_stats or model.stats
427  prefix = 'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
428  self.image_number + 1)
429  r = [float(stats[prefix + '_Rotation%d' % i]) for i in range(4)]
430  t = [float(stats[prefix + '_Translation%d' % i])
431  for i in range(3)]
432  # If the model coordinates are transformed, need to back transform
433  # them first
434  inv = model.transform.get_inverse()
436  IMP.algebra.Vector3D(*t)) * inv
437 
438  def _get_cross_correlation(self, model):
439  """Get the cross correlation coefficient between the model projection
440  and the image"""
441  stats = model.em2d_stats or model.stats
442  return float(stats['ElectronMicroscopy2D_%s_Image%d_CCC'
443  % (self.pmi_restraint.label,
444  self.image_number + 1)])
445 
446 
447 class _EM3DRestraint(ihm.restraint.EM3DRestraint):
448 
449  def __init__(self, simo, state, pmi_restraint, target_ps, densities):
450  self.pmi_restraint = pmi_restraint
451  super(_EM3DRestraint, self).__init__(
452  dataset=pmi_restraint.dataset,
453  assembly=self._get_assembly(densities, simo, state),
454  fitting_method='Gaussian mixture models',
455  number_of_gaussians=len(target_ps))
456 
457  # Have our dataset point to that in the original PMI restraint
458  def __set_dataset(self, val):
459  self.pmi_restraint.dataset = val
460  dataset = property(lambda self: self.pmi_restraint.dataset,
461  __set_dataset)
462 
463  def _get_assembly(self, densities, simo, state):
464  """Get the Assembly that this restraint acts on"""
465  cm = _ComponentMapper(state.prot)
466  components = {}
467  for d in densities:
468  components[cm[d]] = None # None == all residues in this component
469  a = simo._get_subassembly(components,
470  name="EM subassembly",
471  description="All components that fit the EM map")
472  return a
473 
474  def add_fits_from_model_statfile(self, model):
475  ccc = self._get_cross_correlation(model)
476  self.fits[model] = ihm.restraint.EM3DRestraintFit(
477  cross_correlation_coefficient=ccc)
478 
479  def _get_cross_correlation(self, model):
480  """Get the cross correlation coefficient between the model
481  and the map"""
482  if model.stats is not None:
483  return float(model.stats['GaussianEMRestraint_%s_CCC'
484  % self.pmi_restraint.label])
485 
486 
487 class _GeometricRestraint(ihm.restraint.GeometricRestraint):
488 
489  def __init__(self, simo, state, pmi_restraint, geometric_object,
490  feature, distance, sigma):
491  self.pmi_restraint = pmi_restraint
492  super(_GeometricRestraint, self).__init__(
493  dataset=pmi_restraint.dataset,
494  geometric_object=geometric_object, feature=feature,
495  distance=distance, harmonic_force_constant = 1. / sigma,
496  restrain_all=True)
497 
498  # Have our dataset point to that in the original PMI restraint
499  def __set_dataset(self, val):
500  self.pmi_restraint.dataset = val
501  dataset = property(lambda self: self.pmi_restraint.dataset,
502  __set_dataset)
503 
504 
505 class _ReplicaExchangeProtocolStep(ihm.protocol.Step):
506  def __init__(self, state, rex):
507  if rex.monte_carlo_sample_objects is not None:
508  method = 'Replica exchange monte carlo'
509  else:
510  method = 'Replica exchange molecular dynamics'
511  self.monte_carlo_temperature = rex.vars['monte_carlo_temperature']
512  self.replica_exchange_minimum_temperature = \
513  rex.vars['replica_exchange_minimum_temperature']
514  self.replica_exchange_maximum_temperature = \
515  rex.vars['replica_exchange_maximum_temperature']
516  super(_ReplicaExchangeProtocolStep, self).__init__(
517  assembly=state.modeled_assembly,
518  dataset_group=None, # filled in by add_step()
519  method=method, name='Sampling',
520  num_models_begin=None, # filled in by add_step()
521  num_models_end=rex.vars["number_of_frames"],
522  multi_scale=True, multi_state=False, ordered=False)
523 
524 
525 class _ReplicaExchangeProtocolDumper(ihm.dumper.Dumper):
526  """Write IMP-specific information about replica exchange to mmCIF.
527  Note that IDs will have already been assigned by python-ihm's
528  standard modeling protocol dumper."""
529  def dump(self, system, writer):
530  with writer.loop("_imp_replica_exchange_protocol",
531  ["protocol_id", "step_id", "monte_carlo_temperature",
532  "replica_exchange_minimum_temperature",
533  "replica_exchange_maximum_temperature"]) as l:
534  for p in system._all_protocols():
535  for s in p.steps:
536  if isinstance(s, _ReplicaExchangeProtocolStep):
537  self._dump_step(p, s, l)
538 
539  def _dump_step(self, p, s, l):
540  l.write(protocol_id=p._id, step_id=s._id,
541  monte_carlo_temperature=s.monte_carlo_temperature,
542  replica_exchange_minimum_temperature= \
543  s.replica_exchange_minimum_temperature,
544  replica_exchange_maximum_temperature= \
545  s.replica_exchange_maximum_temperature)
546 
547 
548 class _ReplicaExchangeProtocolHandler(ihm.reader.Handler):
549  category = '_imp_replica_exchange_protocol'
550 
551  """Read IMP-specific information about replica exchange from mmCIF."""
552  def __call__(self, protocol_id, step_id, monte_carlo_temperature,
553  replica_exchange_minimum_temperature,
554  replica_exchange_maximum_temperature):
555  p = self.sysr.protocols.get_by_id(protocol_id)
556  # todo: match step_id properly (don't assume contiguous)
557  s = p.steps[int(step_id)-1]
558  # Upgrade from plain ihm Step to IMP subclass
559  s.__class__ = _ReplicaExchangeProtocolStep
560  s.monte_carlo_temperature = \
561  self.get_float(monte_carlo_temperature)
562  s.replica_exchange_minimum_temperature = \
563  self.get_float(replica_exchange_minimum_temperature)
564  s.replica_exchange_maximum_temperature = \
565  self.get_float(replica_exchange_maximum_temperature)
566 
567 
568 class _SimpleProtocolStep(ihm.protocol.Step):
569  def __init__(self, state, num_models_end, method):
570  super(_SimpleProtocolStep, self).__init__(
571  assembly=state.modeled_assembly,
572  dataset_group=None, # filled in by add_step()
573  method=method, name='Sampling',
574  num_models_begin=None, # filled in by add_step()
575  num_models_end=num_models_end,
576  multi_scale=True, multi_state=False, ordered=False)
577 
578 
579 class _Chain(object):
580  """Represent a single chain in a Model"""
581  def __init__(self, pmi_chain_id, asym_unit):
582  self.pmi_chain_id, self.asym_unit = pmi_chain_id, asym_unit
583  self.spheres = []
584  self.atoms = []
585 
586  def add(self, xyz, atom_type, residue_type, residue_index,
587  all_indexes, radius):
588  if atom_type is None:
589  self.spheres.append((xyz, residue_type, residue_index,
590  all_indexes, radius))
591  else:
592  self.atoms.append((xyz, atom_type, residue_type, residue_index,
593  all_indexes, radius))
594  orig_comp = property(lambda self: self.comp)
595 
596 class _TransformedChain(object):
597  """Represent a chain that is a transformed version of another"""
598  def __init__(self, orig_chain, asym_unit, transform):
599  self.orig_chain, self.asym_unit = orig_chain, asym_unit
600  self.transform = transform
601 
602  def __get_spheres(self):
603  for (xyz, residue_type, residue_index, all_indexes,
604  radius) in self.orig_chain.spheres:
605  yield (self.transform * xyz, residue_type, residue_index,
606  all_indexes, radius)
607  spheres = property(__get_spheres)
608 
609  def __get_atoms(self):
610  for (xyz, atom_type, residue_type, residue_index, all_indexes,
611  radius) in self.orig_chain.atoms:
612  yield (self.transform * xyz, atom_type, residue_type, residue_index,
613  all_indexes, radius)
614  atoms = property(__get_atoms)
615 
616  entity = property(lambda self: self.orig_chain.entity)
617  orig_comp = property(lambda self: self.orig_chain.comp)
618 
619 class _Excluder(object):
620  def __init__(self, component, simo):
621  self._seqranges = simo._exclude_coords.get(component, [])
622 
623  def is_excluded(self, indexes):
624  """Return True iff the given sequence range is excluded."""
625  for seqrange in self._seqranges:
626  if indexes[0] >= seqrange[0] and indexes[-1] <= seqrange[1]:
627  return True
628 
629 
630 class _Model(ihm.model.Model):
631  def __init__(self, prot, simo, protocol, assembly, representation):
632  super(_Model, self).__init__(assembly=assembly, protocol=protocol,
633  representation=representation)
634  self.simo = weakref.proxy(simo)
635  # Transformation from IMP coordinates into mmCIF coordinate system.
636  # Normally we pass through coordinates unchanged, but in some cases
637  # we may want to translate them (e.g. Nup84, where the deposited PDB
638  # files are all centered; we want the mmCIF files to match)
640  self.em2d_stats = None
641  self.stats = None
642  # True iff restraints act on this model
643  self._is_restrained = True
644  o = self.output = IMP.pmi.output.Output(atomistic=True)
645  name = 'cif-output'
646  self.m = prot.get_model()
647  o.dictionary_pdbs[name] = prot
648  o._init_dictchain(name, prot, multichar_chain=True)
649  (particle_infos_for_pdb,
650  self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
651  self.geometric_center = IMP.algebra.Vector3D(*self.geometric_center)
652  self._make_spheres_atoms(particle_infos_for_pdb, o, name, simo)
653  self.rmsf = {}
654 
655  def all_chains(self, simo):
656  """Yield all chains, including transformed ones"""
657  chain_for_comp = {}
658  for c in self.chains:
659  yield c
660  chain_for_comp[c.comp] = c
661  for tc in simo._transformed_components:
662  orig_chain = chain_for_comp.get(tc.original, None)
663  if orig_chain:
664  asym = simo.asym_units[tc.name]
665  c = _TransformedChain(orig_chain, asym, tc.transform)
666  c.comp = tc.name
667  yield c
668 
669  def _make_spheres_atoms(self, particle_infos_for_pdb, o, name, simo):
670  entity_for_chain = {}
671  comp_for_chain = {}
672  correct_asym = {}
673  for protname, chain_id in o.dictchain[name].items():
674  if protname in simo.entities:
675  entity_for_chain[chain_id] = simo.entities[protname]
676  else:
677  # Remove copy number
678  pn = protname.split('.')[0]
679  entity_for_chain[chain_id] = simo.entities[pn]
680  comp_for_chain[chain_id] = protname
681  # When doing multi-state modeling, the chain ID returned here
682  # (assigned sequentially) might not be correct (states may have
683  # gaps in the chain IDs). Map it to the correct asym unit.
684  correct_asym[chain_id] = simo.asym_units[protname]
685 
686  # Gather by chain ID (should be sorted by chain ID already)
687  self.chains = []
688  chain = None
689  excluder = None
690 
691  for (xyz, atom_type, residue_type, chain_id, residue_index,
692  all_indexes, radius) in particle_infos_for_pdb:
693  if chain is None or chain.pmi_chain_id != chain_id:
694  chain = _Chain(chain_id, correct_asym[chain_id])
695  chain.entity = entity_for_chain[chain_id]
696  chain.comp = comp_for_chain[chain_id]
697  self.chains.append(chain)
698  excluder = _Excluder(chain.comp, simo)
699  if not excluder.is_excluded(all_indexes if all_indexes
700  else [residue_index]):
701  chain.add(xyz, atom_type, residue_type, residue_index,
702  all_indexes, radius)
703 
704  def parse_rmsf_file(self, fname, component):
705  self.rmsf[component] = rmsf = {}
706  with open(fname) as fh:
707  for line in fh:
708  resnum, blocknum, val = line.split()
709  rmsf[int(resnum)] = (int(blocknum), float(val))
710 
711  def get_rmsf(self, component, indexes):
712  """Get the RMSF value for the given residue indexes."""
713  if not self.rmsf:
714  return None
715  rmsf = self.rmsf[component]
716  blocknums = dict.fromkeys(rmsf[ind][0] for ind in indexes)
717  if len(blocknums) != 1:
718  raise ValueError("Residue indexes %s aren't all in the same block"
719  % str(indexes))
720  return rmsf[indexes[0]][1]
721 
722  def get_atoms(self):
723  for chain in self.all_chains(self.simo):
724  pmi_offset = chain.asym_unit.entity.pmi_offset
725  for atom in chain.atoms:
726  (xyz, atom_type, residue_type, residue_index,
727  all_indexes, radius) = atom
728  pt = self.transform * xyz
729  yield ihm.model.Atom(asym_unit=chain.asym_unit,
730  seq_id=residue_index - pmi_offset,
731  atom_id=atom_type.get_string(),
732  type_symbol=None, # todo: get element
733  x=pt[0], y=pt[1], z=pt[2])
734 
735  def get_spheres(self):
736  for chain in self.all_chains(self.simo):
737  pmi_offset = chain.asym_unit.entity.pmi_offset
738  for sphere in chain.spheres:
739  (xyz, residue_type, residue_index,
740  all_indexes, radius) = sphere
741  if all_indexes is None:
742  all_indexes = (residue_index,)
743  pt = self.transform * xyz
744  yield ihm.model.Sphere(asym_unit=chain.asym_unit,
745  seq_id_range=(all_indexes[0] - pmi_offset,
746  all_indexes[-1] - pmi_offset),
747  x=pt[0], y=pt[1], z=pt[2], radius=radius,
748  rmsf=self.get_rmsf(chain.orig_comp, all_indexes))
749 
750 
751 class _AllProtocols(object):
752  def __init__(self, simo):
753  self.simo = weakref.proxy(simo)
754  # Lists of Protocols by state
755  self.protocols = OrderedDict()
756 
757  def add_protocol(self, state):
758  """Add a new Protocol"""
759  if state not in self.protocols:
760  self.protocols[state] = []
761  p = ihm.protocol.Protocol()
762  self.simo.system.orphan_protocols.append(p)
763  self.protocols[state].append(p)
764 
765  def add_step(self, step, state):
766  """Add a ProtocolStep to the last Protocol of the given State"""
767  if state not in self.protocols:
768  self.add_protocol(state)
769  protocol = self.get_last_protocol(state)
770  if len(protocol.steps) == 0:
771  step.num_models_begin = 0
772  else:
773  step.num_models_begin = protocol.steps[-1].num_models_end
774  protocol.steps.append(step)
775  step.id = len(protocol.steps)
776  # Assume that protocol uses all currently-defined datasets
777  step.dataset_group = self.simo.all_datasets.get_all_group(state)
778 
779  def add_postproc(self, step, state):
780  """Add a postprocessing step to the last protocol"""
781  protocol = self.get_last_protocol(state)
782  if len(protocol.analyses) == 0:
783  protocol.analyses.append(ihm.analysis.Analysis())
784  protocol.analyses[-1].steps.append(step)
785 
786  def get_last_protocol(self, state):
787  """Return the most recently-added _Protocol"""
788  return self.protocols[state][-1]
789 
790 
791 class _AllStartingModels(object):
792  def __init__(self, simo):
793  self.simo = simo
794  # dict of starting models (entire PDB files), collected from fragments,
795  # ordered by component name and state
796  self.models = OrderedDict()
797  self.output = IMP.pmi.output.Output()
798 
799  def add_pdb_fragment(self, fragment):
800  """Add a starting model PDB fragment."""
801  comp = fragment.component
802  state = fragment.state
803  if comp not in self.models:
804  self.models[comp] = OrderedDict()
805  if state not in self.models[comp]:
806  self.models[comp][state] = []
807  models = self.models[comp][state]
808  if len(models) == 0 \
809  or models[-1].fragments[0].pdbname != fragment.pdbname:
810  model = self._add_model(fragment)
811  models.append(model)
812  else:
813  # Break circular ref between fragment and model
814  models[-1].fragments.append(weakref.proxy(fragment))
815  # Update residue range to cover all fragments
816  pmi_offset = models[-1].asym_unit.entity.pmi_offset
817  sid_begin = min(fragment.start + fragment.offset - pmi_offset,
818  models[-1].asym_unit.seq_id_range[0])
819  sid_end = max(fragment.end + fragment.offset - pmi_offset,
820  models[-1].asym_unit.seq_id_range[1])
821  models[-1].asym_unit = fragment.asym_unit.asym(sid_begin, sid_end)
822  fragment.starting_model = models[-1]
823 
824  def _add_model(self, f):
825  parser = ihm.metadata.PDBParser()
826  r = parser.parse_file(f.pdbname)
827 
828  self.simo._add_dataset(r['dataset'])
829  # We only want the templates that model the starting model chain
830  templates = r['templates'].get(f.chain, [])
831  for t in templates:
832  if t.alignment_file:
833  self.simo.system.locations.append(t.alignment_file)
834  if t.dataset:
835  self.simo._add_dataset(t.dataset)
836  source = r['entity_source'].get(f.chain)
837  if source:
838  f.asym_unit.entity.source = source
839  pmi_offset = f.asym_unit.entity.pmi_offset
840  m = _StartingModel(
841  asym_unit=f.asym_unit.asym.pmi_range(f.start, f.end),
842  dataset=r['dataset'], asym_id=f.chain,
843  templates=templates, offset=f.offset + pmi_offset,
844  metadata=r['metadata'],
845  software=r['software'][0] if r['software'] else None,
846  script_file=r['script'])
847  m.fragments = [weakref.proxy(f)]
848  return m
849 
850 
851 class _StartingModel(ihm.startmodel.StartingModel):
852  def get_seq_dif(self):
853  return self._seq_dif
854 
855  def get_atoms(self):
856  pmi_offset = self.asym_unit.entity.pmi_offset
857  mh = IMP.mmcif.data._StartingModelAtomHandler(self.templates,
858  self.asym_unit)
859  for f in self.fragments:
860  sel = IMP.atom.Selection(f.starting_hier,
861  residue_indexes=list(range(f.start - f.offset,
862  f.end - f.offset + 1)))
863  for a in mh.get_ihm_atoms(sel.get_selected_particles(),
864  f.offset - pmi_offset):
865  yield a
866  self._seq_dif = mh._seq_dif
867 
868 
869 class _ReplicaExchangeAnalysisPostProcess(ihm.analysis.ClusterStep):
870  """Post processing using AnalysisReplicaExchange0 macro"""
871 
872  def __init__(self, rex, num_models_begin):
873  self.rex = rex
874  num_models_end = 0
875  for fname in self.get_all_stat_files():
876  with open(fname) as fh:
877  num_models_end += len(fh.readlines())
878  super(_ReplicaExchangeAnalysisPostProcess, self).__init__(
879  feature='RMSD', num_models_begin=num_models_begin,
880  num_models_end=num_models_end)
881 
882  def get_stat_file(self, cluster_num):
883  return os.path.join(self.rex._outputdir, "cluster.%d" % cluster_num,
884  'stat.out')
885 
886  def get_all_stat_files(self):
887  for i in range(self.rex._number_of_clusters):
888  yield self.get_stat_file(i)
889 
890 
891 class _ReplicaExchangeAnalysisEnsemble(ihm.model.Ensemble):
892  """Ensemble generated using AnalysisReplicaExchange0 macro"""
893 
894  num_models_deposited = None # Override base class property
895 
896  def __init__(self, pp, cluster_num, model_group, num_deposit):
897  with open(pp.get_stat_file(cluster_num)) as fh:
898  num_models = len(fh.readlines())
899  super(_ReplicaExchangeAnalysisEnsemble, self).__init__(
900  num_models=num_models,
901  model_group=model_group, post_process=pp,
902  clustering_feature=pp.feature,
903  name=model_group.name)
904  self.cluster_num = cluster_num
905  self.num_models_deposited = num_deposit
906 
907  def get_rmsf_file(self, component):
908  return os.path.join(self.post_process.rex._outputdir,
909  'cluster.%d' % self.cluster_num,
910  'rmsf.%s.dat' % component)
911 
912  def load_rmsf(self, model, component):
913  fname = self.get_rmsf_file(component)
914  if os.path.exists(fname):
915  model.parse_rmsf_file(fname, component)
916 
917  def get_localization_density_file(self, fname):
918  return os.path.join(self.post_process.rex._outputdir,
919  'cluster.%d' % self.cluster_num,
920  '%s.mrc' % fname)
921 
922  def load_localization_density(self, state, fname, select_tuple, asym_units):
923  fullpath = self.get_localization_density_file(fname)
924  if os.path.exists(fullpath):
925  details = "Localization density for %s %s" \
926  % (fname, self.model_group.name)
927  local_file = ihm.location.OutputFileLocation(fullpath,
928  details=details)
929  for s in select_tuple:
930  if isinstance(s, tuple) and len(s) == 3:
931  asym = asym_units[s[2]].pmi_range(s[0], s[1])
932  else:
933  asym = asym_units[s]
934  den = ihm.model.LocalizationDensity(file=local_file,
935  asym_unit=asym)
936  self.densities.append(den)
937 
938  def load_all_models(self, simo, state):
939  stat_fname = self.post_process.get_stat_file(self.cluster_num)
940  model_num = 0
941  with open(stat_fname) as fh:
942  stats = ast.literal_eval(fh.readline())
943  # Correct path
944  rmf_file = os.path.join(os.path.dirname(stat_fname),
945  "%d.rmf3" % model_num)
946  for c in state.all_modeled_components:
947  # todo: this only works with PMI 1
948  state._pmi_object.set_coordinates_from_rmf(c, rmf_file, 0,
949  force_rigid_update=True)
950  # todo: fill in other data from stat file, e.g. crosslink phi/psi
951  yield stats
952  model_num += 1
953  if model_num >= self.num_models_deposited:
954  return
955 
956  # todo: also support dRMS precision
957  def _get_precision(self):
958  precfile = os.path.join(self.post_process.rex._outputdir,
959  "precision.%d.%d.out" % (self.cluster_num,
960  self.cluster_num))
961  if not os.path.exists(precfile):
962  return ihm.unknown
963  # Fail if the precision.x.x.out file doesn't match the cluster
964  r = re.compile('All .*/cluster.%d/ average centroid distance ([\d\.]+)'
965  % self.cluster_num)
966  with open(precfile) as fh:
967  for line in fh:
968  m = r.match(line)
969  if m:
970  return float(m.group(1))
971 
972  precision = property(lambda self: self._get_precision(),
973  lambda self, val: None)
974 
975 class _SimpleEnsemble(ihm.model.Ensemble):
976  """Simple manually-created ensemble"""
977 
978  num_models_deposited = None # Override base class property
979 
980  def __init__(self, pp, model_group, num_models, drmsd,
981  num_models_deposited, ensemble_file):
982  super(_SimpleEnsemble, self).__init__(
983  model_group=model_group, post_process=pp, num_models=num_models,
984  file=ensemble_file, precision=drmsd, name=model_group.name,
985  clustering_feature='dRMSD')
986  self.num_models_deposited = num_models_deposited
987 
988  def load_localization_density(self, state, component, asym, local_file):
989  den = ihm.model.LocalizationDensity(file=local_file, asym_unit=asym)
990  self.densities.append(den)
991 
992 
993 class _EntityMapper(dict):
994  """Handle mapping from IMP components (without copy number) to CIF entities.
995  Multiple components may map to the same entity if they share sequence."""
996  def __init__(self, system):
997  super(_EntityMapper, self).__init__()
998  self._sequence_dict = {}
999  self._entities = []
1000  self.system = system
1001 
1002  def add(self, component_name, sequence, offset):
1003  def entity_seq(sequence):
1004  # Map X to UNK
1005  if 'X' in sequence:
1006  return ['UNK' if s == 'X' else s for s in sequence]
1007  else:
1008  return sequence
1009  if sequence not in self._sequence_dict:
1010  # Use the name of the first component, stripped of any copy number,
1011  # as the description of the entity
1012  d = component_name.split("@")[0].split(".")[0]
1013  entity = Entity(entity_seq(sequence), description=d,
1014  pmi_offset=offset)
1015  self.system.entities.append(entity)
1016  self._sequence_dict[sequence] = entity
1017  self[component_name] = self._sequence_dict[sequence]
1018 
1019 
1020 class _TransformedComponent(object):
1021  def __init__(self, name, original, transform):
1022  self.name, self.original, self.transform = name, original, transform
1023 
1024 
1025 class _SimpleRef(object):
1026  """Class with similar interface to weakref.ref, but keeps a strong ref"""
1027  def __init__(self, ref):
1028  self.ref = ref
1029  def __call__(self):
1030  return self.ref
1031 
1032 
1033 class _State(ihm.model.State):
1034  """Representation of a single state in the system."""
1035 
1036  def __init__(self, pmi_object, po):
1037  # Point to the PMI object for this state. Use a weak reference
1038  # since the state object typically points to us too, so we need
1039  # to break the reference cycle. In PMI1 this will be a
1040  # Representation object; in PMI2 it is the PMI2 State object itself.
1041  self._pmi_object = weakref.proxy(pmi_object)
1042  if hasattr(pmi_object, 'state'):
1043  # Need a strong ref to pmi_object.state to prevent it from being
1044  # cleaned up when doing PMI1 multistate modeling
1045  self._pmi_state = _SimpleRef(pmi_object.state)
1046  else:
1047  self._pmi_state = weakref.ref(pmi_object)
1048  # Preserve PMI state name
1049  old_name = self.name
1050  super(_State, self).__init__(experiment_type='Fraction of bulk')
1051  self.name = old_name
1052 
1053  # The assembly of all components modeled by IMP in this state.
1054  # This may be smaller than the complete assembly.
1055  self.modeled_assembly = ihm.Assembly(
1056  name="Modeled assembly",
1057  description=self.get_postfixed_name(
1058  "All components modeled by IMP"))
1059  po.system.orphan_assemblies.append(self.modeled_assembly)
1060 
1061  self.all_modeled_components = []
1062 
1063  def __hash__(self):
1064  return hash(self._pmi_state())
1065  def __eq__(self, other):
1066  return self._pmi_state() == other._pmi_state()
1067 
1068  def add_model_group(self, group):
1069  self.append(group)
1070  return group
1071 
1072  def get_prefixed_name(self, name):
1073  """Prefix the given name with the state name, if available."""
1074  if self.short_name:
1075  return self.short_name + ' ' + name
1076  else:
1077  # Can't use capitalize() since that un-capitalizes everything
1078  # but the first letter
1079  return name[0].upper() + name[1:] if name else ''
1080 
1081  def get_postfixed_name(self, name):
1082  """Postfix the given name with the state name, if available."""
1083  if self.short_name:
1084  return "%s in state %s" % (name, self.short_name)
1085  else:
1086  return name
1087 
1088  short_name = property(lambda self: self._pmi_state().short_name)
1089  long_name = property(lambda self: self._pmi_state().long_name)
1090  def __get_name(self):
1091  return self._pmi_state().long_name
1092  def __set_name(self, val):
1093  self._pmi_state().long_name = val
1094  name = property(__get_name, __set_name)
1095 
1096 
1097 class Entity(ihm.Entity):
1098  """A single entity in the system.
1099  This functions identically to the base ihm.Entity class, but it
1100  allows identifying residues by either the IHM numbering scheme
1101  (seq_id, which is always contiguous starting at 1) or by the PMI
1102  scheme (which is similar, but may not start at 1 if the sequence in
1103  the FASTA file has one or more N-terminal gaps"""
1104  def __init__(self, sequence, pmi_offset, *args, **keys):
1105  # Offset between PMI numbering and IHM; <pmi_#> = <ihm_#> + pmi_offset
1106  # (pmi_offset is also the number of N-terminal gaps in the FASTA file)
1107  self.pmi_offset = pmi_offset
1108  super(Entity, self).__init__(sequence, *args, **keys)
1109 
1110  def pmi_residue(self, res_id):
1111  """Return a single IHM residue indexed using PMI numbering"""
1112  return self.residue(res_id - self.pmi_offset)
1113 
1114  def pmi_range(self, res_id_begin, res_id_end):
1115  """Return a range of IHM residues indexed using PMI numbering"""
1116  off = self.pmi_offset
1117  return self(res_id_begin - off, res_id_end - off)
1118 
1119 
1120 class AsymUnit(ihm.AsymUnit):
1121  """A single asymmetric unit in the system.
1122  This functions identically to the base ihm.AsymUnit class, but it
1123  allows identifying residues by either the IHM numbering scheme
1124  (seq_id, which is always contiguous starting at 1) or by the PMI
1125  scheme (which is similar, but may not start at 1 if the sequence in
1126  the FASTA file has one or more N-terminal gaps"""
1127 
1128  def __init__(self, entity, *args, **keys):
1129  super(AsymUnit, self).__init__(
1130  entity, auth_seq_id_map=entity.pmi_offset, *args, **keys)
1131 
1132  def pmi_residue(self, res_id):
1133  """Return a single IHM residue indexed using PMI numbering"""
1134  return self.residue(res_id - self.entity.pmi_offset)
1135 
1136  def pmi_range(self, res_id_begin, res_id_end):
1137  """Return a range of IHM residues indexed using PMI numbering"""
1138  off = self.entity.pmi_offset
1139  return self(res_id_begin - off, res_id_end - off)
1140 
1141 
1143  """Class to encode a modeling protocol as mmCIF.
1144 
1145  IMP has basic support for writing out files in mmCIF format, for
1146  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
1147  After creating an instance of this class, attach it to an
1148  IMP.pmi.representation.Representation object. After this, any
1149  generated models and metadata are automatically collected and
1150  output as mmCIF.
1151  """
1152  def __init__(self, fh):
1153  # Ultimately, collect data in an ihm.System object
1154  self.system = ihm.System()
1155  self._state_group = ihm.model.StateGroup()
1156  self.system.state_groups.append(self._state_group)
1157 
1158  self.fh = fh
1159  self._state_ensemble_offset = 0
1160  self._each_metadata = [] # list of metadata for each representation
1161  self._file_datasets = []
1162  self._main_script = os.path.abspath(sys.argv[0])
1163 
1164  # Point to the main modeling script
1165  loc = ihm.location.WorkflowFileLocation(path=self._main_script,
1166  details="The main integrative modeling script")
1167  self.system.locations.append(loc)
1168 
1169  self._states = {}
1170  self.__asym_states = {}
1171  self._working_directory = os.getcwd()
1172  self.default_representation = self.create_representation(
1173  "Default representation")
1174  self.entities = _EntityMapper(self.system)
1175  # Mapping from component names to ihm.AsymUnit
1176  self.asym_units = {}
1177  self._all_components = {}
1178  self.all_modeled_components = []
1179  self._transformed_components = []
1180  self.sequence_dict = {}
1181 
1182  # Common geometric objects used in PMI systems
1183  self._xy_plane = ihm.geometry.XYPlane()
1184  self._xz_plane = ihm.geometry.XZPlane()
1185  self._z_axis = ihm.geometry.ZAxis()
1186  self._center_origin = ihm.geometry.Center(0,0,0)
1187  self._identity_transform = ihm.geometry.Transformation.identity()
1188 
1189  # Coordinates to exclude
1190  self._exclude_coords = {}
1191 
1192  self.all_representations = _AllModelRepresentations(self)
1193  self.all_protocols = _AllProtocols(self)
1194  self.all_datasets = _AllDatasets(self.system)
1195  self.all_starting_models = _AllStartingModels(self)
1196 
1197  self.all_software = _AllSoftware(self.system)
1198 
1199  def create_representation(self, name):
1200  """Create a new Representation and return it. This can be
1201  passed to add_model(), add_bead_element() or add_pdb_element()."""
1202  r = ihm.representation.Representation()
1203  self.system.orphan_representations.append(r)
1204  return r
1205 
1206  def exclude_coordinates(self, component, seqrange):
1207  """Don't record coordinates for the given domain.
1208  Coordinates for the given domain (specified by a component name
1209  and a 2-element tuple giving the start and end residue numbers)
1210  will be excluded from the mmCIF file. This can be used to exclude
1211  parts of the structure that weren't well resolved in modeling.
1212  Any bead or residue that lies wholly within this range will be
1213  excluded. Multiple ranges for a given component can be excluded
1214  by calling this method multiple times."""
1215  if component not in self._exclude_coords:
1216  self._exclude_coords[component] = []
1217  self._exclude_coords[component].append(seqrange)
1218 
1219  def _is_excluded(self, component, start, end):
1220  """Return True iff this chunk of sequence should be excluded"""
1221  for seqrange in self._exclude_coords.get(component, ()):
1222  if start >= seqrange[0] and end <= seqrange[1]:
1223  return True
1224 
1225  def _add_state(self, state):
1226  """Create a new state and return a pointer to it."""
1227  self._state_ensemble_offset = len(self.system.ensembles)
1228  s = _State(state, self)
1229  self._state_group.append(s)
1230  self._last_state = s
1231  return s
1232 
1233  def _get_chain_for_component(self, name, output):
1234  """Get the chain ID for a component, if any."""
1235  # todo: handle multiple copies
1236  if name in self.asym_units:
1237  return self.asym_units[name]._id
1238  else:
1239  # A non-modeled component doesn't have a chain ID
1240  return None
1241 
1242  def _get_assembly_comps(self, assembly):
1243  """Get the names of the components in the given assembly"""
1244  # component name = asym_unit.details
1245  comps = {}
1246  for ca in assembly:
1247  comps[ca.details] = None
1248  return comps
1249 
1250  def create_transformed_component(self, state, name, original, transform):
1251  """Make a new component that's a transformed copy of another.
1252  All representation for the existing component is copied to the
1253  new one."""
1254  assembly_comps = self._get_assembly_comps(state.modeled_assembly)
1255  if name in assembly_comps:
1256  raise ValueError("Component %s already exists" % name)
1257  elif original not in assembly_comps:
1258  raise ValueError("Original component %s does not exist" % original)
1259  self.create_component(state, name, True)
1260  self.add_component_sequence(state, name, self.sequence_dict[original])
1261  self._transformed_components.append(_TransformedComponent(
1262  name, original, transform))
1263  self.all_representations.copy_component(state, name, original,
1264  self.asym_units[name])
1265 
1266  def create_component(self, state, name, modeled, asym_name=None):
1267  if asym_name is None:
1268  asym_name = name
1269  new_comp = name not in self._all_components
1270  self._all_components[name] = None
1271  if modeled:
1272  state.all_modeled_components.append(name)
1273  if new_comp:
1274  # assign asym once we get sequence
1275  self.asym_units[asym_name] = None
1276  self.all_modeled_components.append(name)
1277 
1278  def add_component_sequence(self, state, name, seq, asym_name=None):
1279  if asym_name is None:
1280  asym_name = name
1281  def get_offset(seq):
1282  # Count length of gaps at start of sequence
1283  for i in range(len(seq)):
1284  if seq[i] != '-':
1285  return seq[i:], i
1286  seq, offset = get_offset(seq)
1287  if name in self.sequence_dict:
1288  if self.sequence_dict[name] != seq:
1289  raise ValueError("Sequence mismatch for component %s" % name)
1290  else:
1291  self.sequence_dict[name] = seq
1292  self.entities.add(name, seq, offset)
1293  if asym_name in self.asym_units:
1294  if self.asym_units[asym_name] is None:
1295  # Set up a new asymmetric unit for this component
1296  entity = self.entities[name]
1297  asym = AsymUnit(entity, details=asym_name)
1298  self.system.asym_units.append(asym)
1299  self.asym_units[asym_name] = asym
1300  state.modeled_assembly.append(self.asym_units[asym_name])
1301 
1302  def _add_restraint_model_fits(self):
1303  """Add fits to restraints for all known models"""
1304  for group, m in self.system._all_models():
1305  if m._is_restrained:
1306  for r in self.system.restraints:
1307  if hasattr(r, 'add_fits_from_model_statfile'):
1308  r.add_fits_from_model_statfile(m)
1309 
1310  def flush(self, format='mmCIF'):
1311  """Write out all information to the file.
1312  Information can be written in any format supported by
1313  the ihm library (typically this is 'mmCIF' or 'BCIF')."""
1314  self.finalize()
1315  ihm.dumper.write(self.fh, [self.system], format,
1316  dumpers=[_ReplicaExchangeProtocolDumper])
1317 
1318  def finalize(self):
1319  """Do any final processing on the class hierarchy.
1320  After calling this method, the `system` member (an instance
1321  of `ihm.System`) completely reproduces the PMI modeling, and
1322  can be written out to an mmCIF file with `ihm.dumper.write`,
1323  and/or modified using the ihm API."""
1324  self._add_restraint_model_fits()
1325 
1326  # Add metadata to ihm.System
1327  self.system.software.extend(m for m in self._metadata
1328  if isinstance(m, ihm.Software))
1329  self.system.citations.extend(m for m in self._metadata
1330  if isinstance(m, ihm.Citation))
1331  self.system.locations.extend(m for m in self._metadata
1332  if isinstance(m, ihm.location.FileLocation))
1333 
1334  # Point all locations to repos, if applicable
1335  all_repos = [m for m in self._metadata
1336  if isinstance(m, ihm.location.Repository)]
1337  self.system.update_locations_in_repositories(all_repos)
1338 
1339  def add_pdb_element(self, state, name, start, end, offset, pdbname,
1340  chain, hier, representation=None):
1341  if self._is_excluded(name, start, end):
1342  return
1343  if representation is None:
1344  representation = self.default_representation
1345  asym = self.asym_units[name]
1346  p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
1347  hier, asym)
1348  self.all_representations.add_fragment(state, representation, p)
1349  self.all_starting_models.add_pdb_fragment(p)
1350 
1351  def add_bead_element(self, state, name, start, end, num, hier,
1352  representation=None):
1353  if self._is_excluded(name, start, end):
1354  return
1355  if representation is None:
1356  representation = self.default_representation
1357  asym = self.asym_units[name]
1358  pmi_offset = asym.entity.pmi_offset
1359  b = _BeadsFragment(state, name, start - pmi_offset, end - pmi_offset,
1360  num, hier, asym)
1361  self.all_representations.add_fragment(state, representation, b)
1362 
1363  def get_cross_link_group(self, pmi_restraint):
1364  r = _CrossLinkRestraint(pmi_restraint)
1365  self.system.restraints.append(r)
1366  self._add_restraint_dataset(r) # so that all-dataset group works
1367  return r
1368 
1369  def add_experimental_cross_link(self, r1, c1, r2, c2, rsr):
1370  if c1 not in self._all_components or c2 not in self._all_components:
1371  # Crosslink refers to a component we didn't model
1372  # As a quick hack, just ignore it.
1373  # todo: need to add an entity for this anyway (so will need the
1374  # sequence, and add to struct_assembly)
1375  return None
1376  e1 = self.entities[c1]
1377  e2 = self.entities[c2]
1378  xl = ihm.restraint.ExperimentalCrossLink(residue1=e1.pmi_residue(r1),
1379  residue2=e2.pmi_residue(r2))
1380  rsr.experimental_cross_links.append([xl])
1381  return xl
1382 
1383  def add_cross_link(self, state, ex_xl, p1, p2, length, sigma1_p, sigma2_p,
1384  psi_p, rsr):
1385  # Map p1/p2 to asym units
1386  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1387  d = ihm.restraint.UpperBoundDistanceRestraint(length)
1388 
1389  if _get_by_residue(p1) and _get_by_residue(p2):
1390  cls = _ResidueCrossLink
1391  else:
1392  cls = _FeatureCrossLink
1393  xl = cls(ex_xl, asym1=asym[p1], asym2=asym[p2], distance=d,
1394  restrain_all=True)
1395  # Needed to look up fits later
1396  xl.psi_p, xl.sigma1_p, xl.sigma2_p = psi_p, sigma1_p, sigma2_p
1397  rsr.cross_links.append(xl)
1398 
1399  def add_replica_exchange(self, state, rex):
1400  # todo: allow for metadata to say how many replicas were used in the
1401  # actual experiment, and how many independent runs were carried out
1402  # (use these as multipliers to get the correct total number of
1403  # output models)
1404  step = _ReplicaExchangeProtocolStep(state, rex)
1405  step.software = self.all_software.pmi
1406  self.all_protocols.add_step(step, state)
1407 
1408  def _add_simple_dynamics(self, num_models_end, method):
1409  # Always assumed that we're dealing with the last state
1410  state = self._last_state
1411  self.all_protocols.add_step(_SimpleProtocolStep(state, num_models_end,
1412  method), state)
1413 
1414  def _add_protocol(self):
1415  # Always assumed that we're dealing with the last state
1416  state = self._last_state
1417  self.all_protocols.add_protocol(state)
1418 
1419  def _add_dataset(self, dataset):
1420  return self.all_datasets.add(self._last_state, dataset)
1421 
1422  def _add_restraint_dataset(self, restraint):
1423  return self.all_datasets.add_restraint(self._last_state, restraint)
1424 
1425  def _add_simple_postprocessing(self, num_models_begin, num_models_end):
1426  # Always assumed that we're dealing with the last state
1427  state = self._last_state
1428  pp = ihm.analysis.ClusterStep('RMSD', num_models_begin, num_models_end)
1429  self.all_protocols.add_postproc(pp, state)
1430  return pp
1431 
1432  def _add_no_postprocessing(self, num_models):
1433  # Always assumed that we're dealing with the last state
1434  state = self._last_state
1435  pp = ihm.analysis.EmptyStep()
1436  pp.num_models_begin = pp.num_models_end = num_models
1437  self.all_protocols.add_postproc(pp, state)
1438  return pp
1439 
1440  def _add_simple_ensemble(self, pp, name, num_models, drmsd,
1441  num_models_deposited, localization_densities,
1442  ensemble_file):
1443  """Add an ensemble generated by ad hoc methods (not using PMI).
1444  This is currently only used by the Nup84 system."""
1445  # Always assumed that we're dealing with the last state
1446  state = self._last_state
1447  group = ihm.model.ModelGroup(name=state.get_postfixed_name(name))
1448  state.add_model_group(group)
1449  if ensemble_file:
1450  self.system.locations.append(ensemble_file)
1451  e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
1452  ensemble_file)
1453  self.system.ensembles.append(e)
1454  for c in state.all_modeled_components:
1455  den = localization_densities.get(c, None)
1456  if den:
1457  e.load_localization_density(state, c, self.asym_units[c], den)
1458  return e
1459 
1460  def set_ensemble_file(self, i, location):
1461  """Point a previously-created ensemble to an 'all-models' file.
1462  This could be a trajectory such as DCD, an RMF, or a multimodel
1463  PDB file."""
1464  self.system.locations.append(location)
1465  # Ensure that we point to an ensemble related to the current state
1466  ind = i + self._state_ensemble_offset
1467  self.system.ensembles[ind].file = location
1468 
1469  def add_replica_exchange_analysis(self, state, rex, density_custom_ranges):
1470  # todo: add prefilter as an additional postprocess step (complication:
1471  # we don't know how many models it filtered)
1472  # todo: postpone rmsf/density localization extraction until after
1473  # relevant methods are called (currently it is done from the
1474  # constructor)
1475  protocol = self.all_protocols.get_last_protocol(state)
1476  num_models = protocol.steps[-1].num_models_end
1477  pp = _ReplicaExchangeAnalysisPostProcess(rex, num_models)
1478  pp.software = self.all_software.pmi
1479  self.all_protocols.add_postproc(pp, state)
1480  for i in range(rex._number_of_clusters):
1481  group = ihm.model.ModelGroup(name=state.get_prefixed_name(
1482  'cluster %d' % (i + 1)))
1483  state.add_model_group(group)
1484  # todo: make # of models to deposit configurable somewhere
1485  e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
1486  self.system.ensembles.append(e)
1487  # Add localization density info if available
1488  for fname, stuple in sorted(density_custom_ranges.items()):
1489  e.load_localization_density(state, fname, stuple,
1490  self.asym_units)
1491  for stats in e.load_all_models(self, state):
1492  m = self.add_model(group)
1493  # Since we currently only deposit 1 model, it is the
1494  # best scoring one
1495  m.name = 'Best scoring model'
1496  m.stats = stats
1497  # Add RMSF info if available
1498  for c in state.all_modeled_components:
1499  e.load_rmsf(m, c)
1500 
1501  def _get_subassembly(self, comps, name, description):
1502  """Get an Assembly consisting of the given components.
1503  `compdict` is a dictionary of the components to add, where keys
1504  are the component names and values are the sequence ranges (or
1505  None to use all residues in the component)."""
1506  asyms = []
1507  for comp, seqrng in comps.items():
1508  a = self.asym_units[comp]
1509  asyms.append(a if seqrng is None else a(*seqrng))
1510 
1511  a = ihm.Assembly(asyms, name=name, description=description)
1512  return a
1513 
1514  def _add_foxs_restraint(self, model, comp, seqrange, dataset, rg, chi,
1515  details):
1516  """Add a basic FoXS fit. This is largely intended for use from the
1517  NPC application."""
1518  assembly = self._get_subassembly({comp:seqrange},
1519  name="SAXS subassembly",
1520  description="All components that fit SAXS data")
1521  r = ihm.restraint.SASRestraint(dataset, assembly, segment=False,
1522  fitting_method='FoXS', fitting_atom_type='Heavy atoms',
1523  multi_state=False, radius_of_gyration=rg, details=details)
1524  r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi)
1525  self.system.restraints.append(r)
1526  self._add_restraint_dataset(r) # so that all-dataset group works
1527 
1528  def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
1529  image_resolution, projection_number,
1530  micrographs_number):
1531  r = _EM2DRestraint(state, r, i, resolution, pixel_size,
1532  image_resolution, projection_number,
1533  micrographs_number)
1534  self.system.restraints.append(r)
1535  self._add_restraint_dataset(r) # so that all-dataset group works
1536 
1537  def add_em3d_restraint(self, state, target_ps, densities, pmi_restraint):
1538  # todo: need to set allow_duplicates on this dataset?
1539  r = _EM3DRestraint(self, state, pmi_restraint, target_ps, densities)
1540  self.system.restraints.append(r)
1541  self._add_restraint_dataset(r) # so that all-dataset group works
1542 
1543  def add_zaxial_restraint(self, state, ps, lower_bound, upper_bound,
1544  sigma, pmi_restraint):
1545  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1546  sigma, pmi_restraint, self._xy_plane)
1547 
1548  def add_yaxial_restraint(self, state, ps, lower_bound, upper_bound,
1549  sigma, pmi_restraint):
1550  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1551  sigma, pmi_restraint, self._xz_plane)
1552 
1553  def add_xyradial_restraint(self, state, ps, lower_bound, upper_bound,
1554  sigma, pmi_restraint):
1555  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1556  sigma, pmi_restraint, self._z_axis)
1557 
1558  def _add_geometric_restraint(self, state, ps, lower_bound, upper_bound,
1559  sigma, pmi_restraint, geom):
1560  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1561  r = _GeometricRestraint(self, state, pmi_restraint, geom,
1562  asym.get_feature(ps),
1563  ihm.restraint.LowerUpperBoundDistanceRestraint(
1564  lower_bound, upper_bound),
1565  sigma)
1566  self.system.restraints.append(r)
1567  self._add_restraint_dataset(r) # so that all-dataset group works
1568 
1569  def _get_membrane(self, tor_R, tor_r, tor_th):
1570  """Get an object representing a half-torus membrane"""
1571  if not hasattr(self, '_seen_membranes'):
1572  self._seen_membranes = {}
1573  # If a membrane already exists with all dimensions within 0.01 of
1574  # this one, reuse it
1575  membrane_id = tuple(int(x * 100.) for x in (tor_R, tor_r, tor_th))
1576  if membrane_id not in self._seen_membranes:
1577  m = ihm.geometry.HalfTorus(center=self._center_origin,
1578  transformation=self._identity_transform,
1579  major_radius=tor_R, minor_radius=tor_r, thickness=tor_th,
1580  inner=True, name='Membrane')
1581  self._seen_membranes[membrane_id] = m
1582  return self._seen_membranes[membrane_id]
1583 
1584  def add_membrane_surface_location_restraint(
1585  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1586  self._add_membrane_restraint(state, ps, tor_R, tor_r, tor_th, sigma,
1587  pmi_restraint, ihm.restraint.UpperBoundDistanceRestraint(0.))
1588 
1589  def add_membrane_exclusion_restraint(
1590  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1591  self._add_membrane_restraint(state, ps, tor_R, tor_r, tor_th, sigma,
1592  pmi_restraint, ihm.restraint.LowerBoundDistanceRestraint(0.))
1593 
1594  def _add_membrane_restraint(self, state, ps, tor_R, tor_r, tor_th,
1595  sigma, pmi_restraint, rsr):
1596  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1597  r = _GeometricRestraint(self, state, pmi_restraint,
1598  self._get_membrane(tor_R, tor_r, tor_th),
1599  asym.get_feature(ps), rsr, sigma)
1600  self.system.restraints.append(r)
1601  self._add_restraint_dataset(r) # so that all-dataset group works
1602 
1603  def add_model(self, group, assembly=None, representation=None):
1604  state = self._last_state
1605  if representation is None:
1606  representation = self.default_representation
1607  protocol = self.all_protocols.get_last_protocol(state)
1608  m = _Model(state.prot, self, protocol,
1609  assembly if assembly else state.modeled_assembly,
1610  representation)
1611  group.append(m)
1612  return m
1613 
1614  def _update_locations(self, filelocs):
1615  """Update FileLocation to point to a parent repository, if any"""
1616  all_repos = [m for m in self._metadata
1617  if isinstance(m, ihm.location.Repository)]
1618  for fileloc in filelocs:
1619  ihm.location.Repository._update_in_repos(fileloc, all_repos)
1620 
1621  _metadata = property(lambda self:
1622  itertools.chain.from_iterable(self._each_metadata))
1623 
1624 
1625 def read(fh, model_class=ihm.model.Model, format='mmCIF', handlers=[]):
1626  """Read data from the mmCIF file handle `fh`.
1627  This is a simple wrapper around `ihm.reader.read()`, which also
1628  reads PMI-specific information from the mmCIF or BinaryCIF file."""
1629  return ihm.reader.read(fh, model_class=model_class, format=format,
1630  handlers=[_ReplicaExchangeProtocolHandler]+handlers)
1631 
1632 
1633 class GMMParser(ihm.metadata.Parser):
1634  """Extract metadata from an EM density GMM file."""
1635 
1636  def parse_file(self, filename):
1637  """Extract metadata from `filename`.
1638  @return a dict with key `dataset` pointing to the GMM file and
1639  `number_of_gaussians` to the number of GMMs (or None)"""
1640  l = ihm.location.InputFileLocation(filename,
1641  details="Electron microscopy density map, "
1642  "represented as a Gaussian Mixture Model (GMM)")
1643  # A 3DEM restraint's dataset ID uniquely defines the mmCIF restraint, so
1644  # we need to allow duplicates
1645  l._allow_duplicates = True
1646  d = ihm.dataset.EMDensityDataset(l)
1647  ret = {'dataset':d, 'number_of_gaussians':None}
1648 
1649  with open(filename) as fh:
1650  for line in fh:
1651  if line.startswith('# data_fn: '):
1652  p = ihm.metadata.MRCParser()
1653  fn = line[11:].rstrip('\r\n')
1654  dataset = p.parse_file(os.path.join(
1655  os.path.dirname(filename), fn))['dataset']
1656  ret['dataset'].parents.append(dataset)
1657  elif line.startswith('# ncenters: '):
1658  ret['number_of_gaussians'] = int(line[12:])
1659  return ret
Select non water and non hydrogen atoms.
Definition: pdb.h:245
def create_representation
Create a new Representation and return it.
Definition: mmcif.py:1199
def create_transformed_component
Make a new component that's a transformed copy of another.
Definition: mmcif.py:1250
Simple 3D transformation class.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:155
def exclude_coordinates
Don't record coordinates for the given domain.
Definition: mmcif.py:1209
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Class to encode a modeling protocol as mmCIF.
Definition: mmcif.py:1148
def flush
Write out all information to the file.
Definition: mmcif.py:1310
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
def parse_file
Extract metadata from filename.
Definition: mmcif.py:1637
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: mmcif.py:1114
Miscellaneous utilities.
Definition: tools.py:1
Extract metadata from an EM density GMM file.
Definition: mmcif.py:1633
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:690
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: mmcif.py:1132
void read_pdb(TextInput input, int model, Hierarchy h)
A single asymmetric unit in the system.
Definition: mmcif.py:1124
A single entity in the system.
Definition: mmcif.py:1097
Representation of the system.
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
Definition: mmcif.py:1460
Classes to represent data structures used in mmCIF.
Definition: data.py:1
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: mmcif.py:1136
void add_restraint(RMF::FileHandle fh, Restraint *hs)
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def finalize
Do any final processing on the class hierarchy.
Definition: mmcif.py:1318
Base class for capturing a modeling protocol.
Definition: output.py:40
Basic utilities for handling cryo-electron microscopy 3D density maps.
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:62
3D rotation class.
Definition: Rotation3D.h:46
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
A decorator for a residue.
Definition: Residue.h:134
Basic functionality that is expected to be used by a wide variety of IMP users.
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: mmcif.py:1110
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
def read
Read data from the mmCIF file handle fh.
Definition: mmcif.py:1625
VectorD< 3 > Vector3D
Definition: VectorD.h:395
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:189
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...