IMP logo
IMP Reference Guide  2.12.0
The Integrative Modeling Platform
/mmcif.py
1 """@namespace IMP.pmi1.mmcif
2  @brief Support for the mmCIF file format.
3 
4  IMP has basic support for writing out files in mmCIF format, for
5  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
6  mmCIF files are currently generated by creating an
7  IMP.pmi1.mmcif.ProtocolOutput class, and attaching it to an
8  IMP.pmi1.representation.Representation object, after which any
9  generated models and metadata are collected and output as mmCIF.
10 """
11 
12 from __future__ import print_function
13 import copy
14 import IMP.core
15 import IMP.algebra
16 import IMP.atom
17 import IMP.em
18 import IMP.isd
20 import IMP.pmi1.tools
21 from IMP.pmi1.tools import OrderedDict
22 import IMP.pmi1.output
23 import IMP.pmi1.metadata
24 import IMP.mmcif.data
25 import re
26 import ast
27 import sys
28 import os
29 import textwrap
30 import weakref
31 import operator
32 import itertools
33 import ihm.location
34 import ihm.dataset
35 import ihm.dumper
36 import ihm.metadata
37 import ihm.startmodel
38 import ihm.model
39 import ihm.protocol
40 import ihm.analysis
41 import ihm.representation
42 import ihm.geometry
43 import ihm.cross_linkers
44 
45 def _assign_id(obj, seen_objs, obj_by_id):
46  """Assign a unique ID to obj, and track all ids in obj_by_id."""
47  if obj not in seen_objs:
48  if not hasattr(obj, 'id'):
49  obj_by_id.append(obj)
50  obj.id = len(obj_by_id)
51  seen_objs[obj] = obj.id
52  else:
53  obj.id = seen_objs[obj]
54 
55 
56 def _get_by_residue(p):
57  """Determine whether the given particle represents a specific residue
58  or a more coarse-grained object."""
60 
61 
62 class _ComponentMapper(object):
63  """Map a Particle to a component name"""
64  def __init__(self, prot):
65  self.o = IMP.pmi1.output.Output()
66  self.prot = prot
67  self.name = 'cif-output'
68  self.o.dictionary_pdbs[self.name] = self.prot
69  self.o._init_dictchain(self.name, self.prot, multichar_chain=True)
70 
71  def __getitem__(self, p):
72  protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
73  return protname
74 
75 
76 class _AsymMapper(object):
77  """Map a Particle to an asym_unit"""
78  def __init__(self, simo, prot):
79  self.simo = simo
80  self._cm = _ComponentMapper(prot)
81 
82  def __getitem__(self, p):
83  protname = self._cm[p]
84  return self.simo.asym_units[protname]
85 
86  def get_feature(self, ps):
87  """Get an ihm.restraint.Feature that covers the given particles"""
88  # todo: handle things other than residues
89  rngs = []
90  for p in ps:
91  asym = self[p]
92  # todo: handle overlapping or adjoining ranges
94  rind = IMP.atom.Residue(p).get_index()
95  rngs.append(asym(rind, rind))
97  # PMI Fragments always contain contiguous residues
98  rinds = IMP.atom.Fragment(p).get_residue_indexes()
99  rngs.append(asym(rinds[0], rinds[-1]))
100  else:
101  raise ValueError("Unsupported particle type %s" % str(p))
102  return ihm.restraint.ResidueFeature(rngs)
103 
104 
105 class _AllSoftware(object):
106  def __init__(self, system):
107  self.system = system
108  self.modeller_used = self.phyre2_used = False
109  self.system.software.extend([
110  ihm.Software(
111  name="Integrative Modeling Platform (IMP)",
112  version=IMP.__version__,
113  classification="integrative model building",
114  description="integrative model building",
115  location='https://integrativemodeling.org'),
116  ihm.Software(
117  name="IMP PMI module",
118  version=IMP.pmi1.__version__,
119  classification="integrative model building",
120  description="integrative model building",
121  location='https://integrativemodeling.org')])
122 
123  def set_modeller_used(self, version, date):
124  if self.modeller_used:
125  return
126  self.modeller_used = True
127  self.system.software.append(ihm.Software(
128  name='MODELLER', classification='comparative modeling',
129  description='Comparative modeling by satisfaction '
130  'of spatial restraints, build ' + date,
131  location='https://salilab.org/modeller/',
132  version=version))
133 
134  def set_phyre2_used(self):
135  if self.phyre2_used:
136  return
137  self.phyre2_used = True
138  self.system.software.append(ihm.Software(
139  name='Phyre2', classification='protein homology modeling',
140  description='Protein Homology/analogY Recognition '
141  'Engine V 2.0',
142  version='2.0',
143  location='http://www.sbg.bio.ic.ac.uk/~phyre2/'))
144 
145 def _get_fragment_is_rigid(fragment):
146  """Determine whether a fragment is modeled rigidly"""
147  leaves = IMP.atom.get_leaves(fragment.hier)
148  # Assume all leaves are set up as rigid/flexible in the same way
149  return IMP.core.RigidMember.get_is_setup(leaves[0])
150 
151 class _PDBFragment(ihm.representation.ResidueSegment):
152  """Record details about part of a PDB file used as input
153  for a component."""
154  def __init__(self, state, component, start, end, pdb_offset,
155  pdbname, chain, hier, asym_unit):
156  # start, end are PMI residue indexes (not IHM)
157  super(_PDBFragment, self).__init__(
158  asym_unit=asym_unit.pmi_range(start, end),
159  rigid=None, primitive='sphere')
160  self.component, self.start, self.end, self.offset, self.pdbname \
161  = component, start, end, pdb_offset, pdbname
162  self.state, self.chain, self.hier = state, chain, hier
165  self.starting_hier = IMP.atom.read_pdb(pdbname, state.model, sel)
166 
167  rigid = property(lambda self: _get_fragment_is_rigid(self),
168  lambda self, val: None)
169 
170  def combine(self, other):
171  pass
172 
173 
174 class _BeadsFragment(ihm.representation.FeatureSegment):
175  """Record details about beads used to represent part of a component."""
176  chain = None
177  def __init__(self, state, component, start, end, count, hier, asym_unit):
178  super(_BeadsFragment, self).__init__(asym_unit=asym_unit(start, end),
179  rigid=None, primitive='sphere', count=count)
180  self.state, self.component, self.hier \
181  = state, component, hier
182 
183  rigid = property(lambda self: _get_fragment_is_rigid(self),
184  lambda self, val: None)
185 
186  def combine(self, other):
187  # todo: don't combine if one fragment is rigid and the other flexible
188  if type(other) == type(self) and \
189  other.asym_unit.seq_id_range[0] == self.asym_unit.seq_id_range[1] + 1:
190  self.asym_unit.seq_id_range = (self.asym_unit.seq_id_range[0],
191  other.asym_unit.seq_id_range[1])
192  self.count += other.count
193  return True
194 
195 
196 class _AllModelRepresentations(object):
197  def __init__(self, simo):
198  self.simo = simo
199  # dict of fragments, ordered by representation, then component name,
200  # then state
201  self.fragments = OrderedDict()
202  self._all_representations = {}
203 
204  def copy_component(self, state, name, original, asym_unit):
205  """Copy all representation for `original` in `state` to `name`"""
206  def copy_frag(f):
207  newf = copy.copy(f)
208  newf.asym_unit = asym_unit(*f.asym_unit.seq_id_range)
209  return newf
210  for rep in self.fragments:
211  if original in self.fragments[rep]:
212  if name not in self.fragments[rep]:
213  self.fragments[rep][name] = OrderedDict()
214  self.fragments[rep][name][state] = [copy_frag(f)
215  for f in self.fragments[rep][original][state]]
216  # Assume representation for a component is the same in all
217  # states, so only write out the first one
218  first_state = list(self.fragments[rep][name].keys())[0]
219  if state is first_state:
220  representation = self._all_representations[rep]
221  representation.extend(self.fragments[rep][name][state])
222 
223  def add_fragment(self, state, representation, fragment):
224  """Add a model fragment."""
225  comp = fragment.component
226  id_rep = id(representation)
227  self._all_representations[id_rep] = representation
228  if id_rep not in self.fragments:
229  self.fragments[id_rep] = OrderedDict()
230  if comp not in self.fragments[id_rep]:
231  self.fragments[id_rep][comp] = OrderedDict()
232  if state not in self.fragments[id_rep][comp]:
233  self.fragments[id_rep][comp][state] = []
234  fragments = self.fragments[id_rep][comp][state]
235  if len(fragments) == 0 or not fragments[-1].combine(fragment):
236  fragments.append(fragment)
237  # Assume representation for a component is the same in all states,
238  # so only write out the first one
239  first_state = list(self.fragments[id_rep][comp].keys())[0]
240  if state is first_state:
241  representation.append(fragment)
242 
243 
244 class _AllDatasets(object):
245  """Track all datasets generated by PMI and add them to the ihm.System"""
246  def __init__(self, system):
247  self.system = system
248  self._datasets = []
249  self._datasets_by_state = {}
250  self._restraints_by_state = {}
251 
252  def get_all_group(self, state):
253  """Get a DatasetGroup encompassing all datasets so far in this state"""
254  # Note that if a restraint dataset is replaced after this is called,
255  # the group will still contain the old dataset - mark dataset as read
256  # only?
257  g = ihm.dataset.DatasetGroup(self._datasets_by_state.get(state, [])
258  + [r.dataset for r in self._restraints_by_state.get(state, [])
259  if r.dataset])
260  return g
261 
262  def add(self, state, dataset):
263  """Add a new dataset."""
264  self._datasets.append(dataset)
265  if state not in self._datasets_by_state:
266  self._datasets_by_state[state] = []
267  self._datasets_by_state[state].append(dataset)
268  # Make sure that the dataset ends up in the mmCIF file even if nothing
269  # refers to it
270  self.system.orphan_datasets.append(dataset)
271  return dataset
272 
273  def add_restraint(self, state, restraint):
274  """Add the dataset for a restraint"""
275  if state not in self._restraints_by_state:
276  self._restraints_by_state[state] = []
277  self._restraints_by_state[state].append(restraint)
278 
279 
280 class _CrossLinkRestraint(ihm.restraint.CrossLinkRestraint):
281  """Restrain to a set of cross links"""
282 
283  assembly = None
284  _label_map = {'wtDSS':'DSS', 'scDSS':'DSS', 'scEDC':'EDC'}
285  _descriptor_map = {'DSS': ihm.cross_linkers.dss,
286  'EDC': ihm.cross_linkers.edc}
287 
288  def __init__(self, pmi_restraint):
289  self.pmi_restraint = pmi_restraint
290  label = self.pmi_restraint.label
291  # Map commonly-used subtypes to more standard labels
292  label = self._label_map.get(label, label)
293  self.label = label
294  super(_CrossLinkRestraint, self).__init__(
295  dataset=self.pmi_restraint.dataset,
296  linker=self._get_chem_descriptor(label))
297 
298  @classmethod
299  def _get_chem_descriptor(cls, label):
300  if label not in cls._descriptor_map:
301  # If label is not a standard linker type, make a new chemical
302  # descriptor containing just the name. We don't know the chemistry
303  # so cannot specify a SMILES or INCHI string for it at this point
304  d = ihm.ChemDescriptor(label)
305  cls._descriptor_map[label] = d
306  return cls._descriptor_map[label]
307 
308  def _set_psi_sigma(self, model):
309  old_values = []
310  # Do nothing if given a different state than the restraint applies to
311  if model.m != self.pmi_restraint.m:
312  return
313  for resolution in self.pmi_restraint.sigma_dictionary:
314  statname = 'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, self.label)
315  if model.stats and statname in model.stats:
316  sigma = float(model.stats[statname])
317  p = self.pmi_restraint.sigma_dictionary[resolution][0]
318  old_values.append((p, p.get_scale()))
319  p.set_scale(sigma)
320  for psiindex in self.pmi_restraint.psi_dictionary:
321  statname = 'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, self.label)
322  if model.stats and statname in model.stats:
323  psi = float(model.stats[statname])
324  p = self.pmi_restraint.psi_dictionary[psiindex][0]
325  old_values.append((p, p.get_scale()))
326  p.set_scale(psi)
327  # If the same particle is set multiple times, make sure we get the
328  # original value last
329  return list(reversed(old_values))
330 
331  def add_fits_from_model_statfile(self, model):
332  # Set psi/sigma for all particles
333  old_values = self._set_psi_sigma(model)
334  # If no stats were collected, we can't show the fit
335  if not old_values:
336  return
337  for xl in self.cross_links:
338  # Get current psi/sigma particle value for each XL
339  xl.fits[model] = ihm.restraint.CrossLinkFit(
340  psi=xl.psi, sigma1=xl.sigma1, sigma2=xl.sigma2)
341  # Restore original psi/sigma
342  for p, scale in old_values:
343  p.set_scale(scale)
344 
345  # Have our dataset point to that in the original PMI restraint
346  def __set_dataset(self, val):
347  self.pmi_restraint.dataset = val
348  dataset = property(lambda self: self.pmi_restraint.dataset,
349  __set_dataset)
350 
351 
352 def get_asym_mapper_for_state(simo, state, asym_map):
353  asym = asym_map.get(state, None)
354  if asym is None:
355  asym = _AsymMapper(simo, state.prot)
356  asym_map[state] = asym
357  return asym
358 
359 class _PMICrossLink(object):
360  # Query PMI particles to get psi and sigma values at time of use, not
361  # time of construction (since they may be set after creating the restraint)
362  psi = property(lambda self: self.psi_p.get_scale(),
363  lambda self, val: None)
364  sigma1 = property(lambda self: self.sigma1_p.get_scale(),
365  lambda self, val: None)
366  sigma2 = property(lambda self: self.sigma2_p.get_scale(),
367  lambda self, val: None)
368 
369 
370 class _ResidueCrossLink(ihm.restraint.ResidueCrossLink, _PMICrossLink):
371  pass
372 
373 
374 class _FeatureCrossLink(ihm.restraint.FeatureCrossLink, _PMICrossLink):
375  pass
376 
377 
378 class _EM2DRestraint(ihm.restraint.EM2DRestraint):
379  def __init__(self, state, pmi_restraint, image_number, resolution,
380  pixel_size, image_resolution, projection_number,
381  micrographs_number):
382  self.pmi_restraint, self.image_number = pmi_restraint, image_number
383  super(_EM2DRestraint, self).__init__(
384  dataset=pmi_restraint.datasets[image_number],
385  assembly=state.modeled_assembly,
386  segment=False, number_raw_micrographs=micrographs_number,
387  pixel_size_width=pixel_size, pixel_size_height=pixel_size,
388  image_resolution=image_resolution,
389  number_of_projections=projection_number)
390 
391  # Have our dataset point to that in the original PMI restraint
392  def __get_dataset(self):
393  return self.pmi_restraint.datasets[self.image_number]
394  def __set_dataset(self, val):
395  self.pmi_restraint.datasets[self.image_number] = val
396  dataset = property(__get_dataset, __set_dataset)
397 
398  def add_fits_from_model_statfile(self, model):
399  ccc = self._get_cross_correlation(model)
400  transform = self._get_transformation(model)
401  rot = transform.get_rotation()
402  rm = [[e for e in rot.get_rotation_matrix_row(i)] for i in range(3)]
403  self.fits[model] = ihm.restraint.EM2DRestraintFit(
404  cross_correlation_coefficient=ccc,
405  rot_matrix=rm,
406  tr_vector=transform.get_translation())
407 
408  def _get_transformation(self, model):
409  """Get the transformation that places the model on the image"""
410  stats = model.em2d_stats or model.stats
411  prefix = 'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
412  self.image_number + 1)
413  r = [float(stats[prefix + '_Rotation%d' % i]) for i in range(4)]
414  t = [float(stats[prefix + '_Translation%d' % i])
415  for i in range(3)]
416  # If the model coordinates are transformed, need to back transform
417  # them first
418  inv = model.transform.get_inverse()
420  IMP.algebra.Vector3D(*t)) * inv
421 
422  def _get_cross_correlation(self, model):
423  """Get the cross correlation coefficient between the model projection
424  and the image"""
425  stats = model.em2d_stats or model.stats
426  return float(stats['ElectronMicroscopy2D_%s_Image%d_CCC'
427  % (self.pmi_restraint.label,
428  self.image_number + 1)])
429 
430 
431 class _EM3DRestraint(ihm.restraint.EM3DRestraint):
432 
433  def __init__(self, simo, state, pmi_restraint, target_ps, densities):
434  self.pmi_restraint = pmi_restraint
435  super(_EM3DRestraint, self).__init__(
436  dataset=pmi_restraint.dataset,
437  assembly=self._get_assembly(densities, simo, state),
438  fitting_method='Gaussian mixture models',
439  number_of_gaussians=len(target_ps))
440 
441  # Have our dataset point to that in the original PMI restraint
442  def __set_dataset(self, val):
443  self.pmi_restraint.dataset = val
444  dataset = property(lambda self: self.pmi_restraint.dataset,
445  __set_dataset)
446 
447  def _get_assembly(self, densities, simo, state):
448  """Get the Assembly that this restraint acts on"""
449  cm = _ComponentMapper(state.prot)
450  components = {}
451  for d in densities:
452  components[cm[d]] = None # None == all residues in this component
453  a = simo._get_subassembly(components,
454  name="EM subassembly",
455  description="All components that fit the EM map")
456  return a
457 
458  def add_fits_from_model_statfile(self, model):
459  ccc = self._get_cross_correlation(model)
460  self.fits[model] = ihm.restraint.EM3DRestraintFit(
461  cross_correlation_coefficient=ccc)
462 
463  def _get_cross_correlation(self, model):
464  """Get the cross correlation coefficient between the model
465  and the map"""
466  if model.stats is not None:
467  return float(model.stats['GaussianEMRestraint_%s_CCC'
468  % self.pmi_restraint.label])
469 
470 
471 class _GeometricRestraint(ihm.restraint.GeometricRestraint):
472 
473  def __init__(self, simo, state, pmi_restraint, geometric_object,
474  feature, distance, sigma):
475  self.pmi_restraint = pmi_restraint
476  super(_GeometricRestraint, self).__init__(
477  dataset=pmi_restraint.dataset,
478  geometric_object=geometric_object, feature=feature,
479  distance=distance, harmonic_force_constant = 1. / sigma,
480  restrain_all=True)
481 
482  # Have our dataset point to that in the original PMI restraint
483  def __set_dataset(self, val):
484  self.pmi_restraint.dataset = val
485  dataset = property(lambda self: self.pmi_restraint.dataset,
486  __set_dataset)
487 
488 
489 class _ReplicaExchangeProtocolStep(ihm.protocol.Step):
490  def __init__(self, state, rex):
491  if rex.monte_carlo_sample_objects is not None:
492  method = 'Replica exchange monte carlo'
493  else:
494  method = 'Replica exchange molecular dynamics'
495  super(_ReplicaExchangeProtocolStep, self).__init__(
496  assembly=state.modeled_assembly,
497  dataset_group=None, # filled in by add_step()
498  method=method, name='Sampling',
499  num_models_begin=None, # filled in by add_step()
500  num_models_end=rex.vars["number_of_frames"],
501  multi_scale=True, multi_state=False, ordered=False)
502 
503 
504 class _SimpleProtocolStep(ihm.protocol.Step):
505  def __init__(self, state, num_models_end, method):
506  super(_SimpleProtocolStep, self).__init__(
507  assembly=state.modeled_assembly,
508  dataset_group=None, # filled in by add_step()
509  method=method, name='Sampling',
510  num_models_begin=None, # filled in by add_step()
511  num_models_end=num_models_end,
512  multi_scale=True, multi_state=False, ordered=False)
513 
514 
515 class _Chain(object):
516  """Represent a single chain in a Model"""
517  def __init__(self, pmi_chain_id, asym_unit):
518  self.pmi_chain_id, self.asym_unit = pmi_chain_id, asym_unit
519  self.spheres = []
520  self.atoms = []
521 
522  def add(self, xyz, atom_type, residue_type, residue_index,
523  all_indexes, radius):
524  if atom_type is None:
525  self.spheres.append((xyz, residue_type, residue_index,
526  all_indexes, radius))
527  else:
528  self.atoms.append((xyz, atom_type, residue_type, residue_index,
529  all_indexes, radius))
530  orig_comp = property(lambda self: self.comp)
531 
532 class _TransformedChain(object):
533  """Represent a chain that is a transformed version of another"""
534  def __init__(self, orig_chain, asym_unit, transform):
535  self.orig_chain, self.asym_unit = orig_chain, asym_unit
536  self.transform = transform
537 
538  def __get_spheres(self):
539  for (xyz, residue_type, residue_index, all_indexes,
540  radius) in self.orig_chain.spheres:
541  yield (self.transform * xyz, residue_type, residue_index,
542  all_indexes, radius)
543  spheres = property(__get_spheres)
544 
545  def __get_atoms(self):
546  for (xyz, atom_type, residue_type, residue_index, all_indexes,
547  radius) in self.orig_chain.atoms:
548  yield (self.transform * xyz, atom_type, residue_type, residue_index,
549  all_indexes, radius)
550  atoms = property(__get_atoms)
551 
552  entity = property(lambda self: self.orig_chain.entity)
553  orig_comp = property(lambda self: self.orig_chain.comp)
554 
555 class _Excluder(object):
556  def __init__(self, component, simo):
557  self._seqranges = simo._exclude_coords.get(component, [])
558 
559  def is_excluded(self, indexes):
560  """Return True iff the given sequence range is excluded."""
561  for seqrange in self._seqranges:
562  if indexes[0] >= seqrange[0] and indexes[-1] <= seqrange[1]:
563  return True
564 
565 
566 class _Model(ihm.model.Model):
567  def __init__(self, prot, simo, protocol, assembly, representation):
568  super(_Model, self).__init__(assembly=assembly, protocol=protocol,
569  representation=representation)
570  self.simo = weakref.proxy(simo)
571  # Transformation from IMP coordinates into mmCIF coordinate system.
572  # Normally we pass through coordinates unchanged, but in some cases
573  # we may want to translate them (e.g. Nup84, where the deposited PDB
574  # files are all centered; we want the mmCIF files to match)
576  self.em2d_stats = None
577  self.stats = None
578  # True iff restraints act on this model
579  self._is_restrained = True
580  o = self.output = IMP.pmi1.output.Output(atomistic=True)
581  name = 'cif-output'
582  self.m = prot.get_model()
583  o.dictionary_pdbs[name] = prot
584  o._init_dictchain(name, prot, multichar_chain=True)
585  (particle_infos_for_pdb,
586  self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
587  self.geometric_center = IMP.algebra.Vector3D(*self.geometric_center)
588  self._make_spheres_atoms(particle_infos_for_pdb, o, name, simo)
589  self.rmsf = {}
590 
591  def all_chains(self, simo):
592  """Yield all chains, including transformed ones"""
593  chain_for_comp = {}
594  for c in self.chains:
595  yield c
596  chain_for_comp[c.comp] = c
597  for tc in simo._transformed_components:
598  orig_chain = chain_for_comp.get(tc.original, None)
599  if orig_chain:
600  asym = simo.asym_units[tc.name]
601  c = _TransformedChain(orig_chain, asym, tc.transform)
602  c.comp = tc.name
603  yield c
604 
605  def _make_spheres_atoms(self, particle_infos_for_pdb, o, name, simo):
606  entity_for_chain = {}
607  comp_for_chain = {}
608  correct_asym = {}
609  for protname, chain_id in o.dictchain[name].items():
610  entity_for_chain[chain_id] = simo.entities[protname]
611  comp_for_chain[chain_id] = protname
612  # When doing multi-state modeling, the chain ID returned here
613  # (assigned sequentially) might not be correct (states may have
614  # gaps in the chain IDs). Map it to the correct asym unit.
615  correct_asym[chain_id] = simo.asym_units[protname]
616 
617  # Gather by chain ID (should be sorted by chain ID already)
618  self.chains = []
619  chain = None
620  excluder = None
621 
622  for (xyz, atom_type, residue_type, chain_id, residue_index,
623  all_indexes, radius) in particle_infos_for_pdb:
624  if chain is None or chain.pmi_chain_id != chain_id:
625  chain = _Chain(chain_id, correct_asym[chain_id])
626  chain.entity = entity_for_chain[chain_id]
627  chain.comp = comp_for_chain[chain_id]
628  self.chains.append(chain)
629  excluder = _Excluder(chain.comp, simo)
630  if not excluder.is_excluded(all_indexes if all_indexes
631  else [residue_index]):
632  chain.add(xyz, atom_type, residue_type, residue_index,
633  all_indexes, radius)
634 
635  def parse_rmsf_file(self, fname, component):
636  self.rmsf[component] = rmsf = {}
637  with open(fname) as fh:
638  for line in fh:
639  resnum, blocknum, val = line.split()
640  rmsf[int(resnum)] = (int(blocknum), float(val))
641 
642  def get_rmsf(self, component, indexes):
643  """Get the RMSF value for the given residue indexes."""
644  if not self.rmsf:
645  return None
646  rmsf = self.rmsf[component]
647  blocknums = dict.fromkeys(rmsf[ind][0] for ind in indexes)
648  if len(blocknums) != 1:
649  raise ValueError("Residue indexes %s aren't all in the same block"
650  % str(indexes))
651  return rmsf[indexes[0]][1]
652 
653  def get_atoms(self):
654  for chain in self.all_chains(self.simo):
655  pmi_offset = chain.asym_unit.entity.pmi_offset
656  for atom in chain.atoms:
657  (xyz, atom_type, residue_type, residue_index,
658  all_indexes, radius) = atom
659  pt = self.transform * xyz
660  yield ihm.model.Atom(asym_unit=chain.asym_unit,
661  seq_id=residue_index - pmi_offset,
662  atom_id=atom_type.get_string(),
663  type_symbol=None, # todo: get element
664  x=pt[0], y=pt[1], z=pt[2])
665 
666  def get_spheres(self):
667  for chain in self.all_chains(self.simo):
668  pmi_offset = chain.asym_unit.entity.pmi_offset
669  for sphere in chain.spheres:
670  (xyz, residue_type, residue_index,
671  all_indexes, radius) = sphere
672  if all_indexes is None:
673  all_indexes = (residue_index,)
674  pt = self.transform * xyz
675  yield ihm.model.Sphere(asym_unit=chain.asym_unit,
676  seq_id_range=(all_indexes[0] - pmi_offset,
677  all_indexes[-1] - pmi_offset),
678  x=pt[0], y=pt[1], z=pt[2], radius=radius,
679  rmsf=self.get_rmsf(chain.orig_comp, all_indexes))
680 
681 
682 class _AllProtocols(object):
683  def __init__(self, simo):
684  self.simo = weakref.proxy(simo)
685  # Lists of Protocols by state
686  self.protocols = OrderedDict()
687 
688  def add_protocol(self, state):
689  """Add a new Protocol"""
690  if state not in self.protocols:
691  self.protocols[state] = []
692  p = ihm.protocol.Protocol()
693  self.simo.system.orphan_protocols.append(p)
694  self.protocols[state].append(p)
695 
696  def add_step(self, step, state):
697  """Add a ProtocolStep to the last Protocol of the given State"""
698  if state not in self.protocols:
699  self.add_protocol(state)
700  protocol = self.get_last_protocol(state)
701  if len(protocol.steps) == 0:
702  step.num_models_begin = 0
703  else:
704  step.num_models_begin = protocol.steps[-1].num_models_end
705  protocol.steps.append(step)
706  step.id = len(protocol.steps)
707  # Assume that protocol uses all currently-defined datasets
708  step.dataset_group = self.simo.all_datasets.get_all_group(state)
709 
710  def add_postproc(self, step, state):
711  """Add a postprocessing step to the last protocol"""
712  protocol = self.get_last_protocol(state)
713  if len(protocol.analyses) == 0:
714  protocol.analyses.append(ihm.analysis.Analysis())
715  protocol.analyses[-1].steps.append(step)
716 
717  def get_last_protocol(self, state):
718  """Return the most recently-added _Protocol"""
719  return self.protocols[state][-1]
720 
721 
722 class _AllStartingModels(object):
723  def __init__(self, simo):
724  self.simo = simo
725  # dict of starting models (entire PDB files), collected from fragments,
726  # ordered by component name and state
727  self.models = OrderedDict()
728  self.output = IMP.pmi1.output.Output()
729 
730  def add_pdb_fragment(self, fragment):
731  """Add a starting model PDB fragment."""
732  comp = fragment.component
733  state = fragment.state
734  if comp not in self.models:
735  self.models[comp] = OrderedDict()
736  if state not in self.models[comp]:
737  self.models[comp][state] = []
738  models = self.models[comp][state]
739  if len(models) == 0 \
740  or models[-1].fragments[0].pdbname != fragment.pdbname:
741  model = self._add_model(fragment)
742  models.append(model)
743  else:
744  # Break circular ref between fragment and model
745  models[-1].fragments.append(weakref.proxy(fragment))
746  # Update residue range to cover all fragments
747  pmi_offset = models[-1].asym_unit.entity.pmi_offset
748  sid_begin = min(fragment.start + fragment.offset - pmi_offset,
749  models[-1].asym_unit.seq_id_range[0])
750  sid_end = max(fragment.end + fragment.offset - pmi_offset,
751  models[-1].asym_unit.seq_id_range[1])
752  models[-1].asym_unit = fragment.asym_unit.asym(sid_begin, sid_end)
753  fragment.starting_model = models[-1]
754 
755  def _add_model(self, f):
756  parser = ihm.metadata.PDBParser()
757  r = parser.parse_file(f.pdbname)
758 
759  self.simo._add_dataset(r['dataset'])
760  # We only want the templates that model the starting model chain
761  templates = r['templates'].get(f.chain, [])
762  for t in templates:
763  if t.alignment_file:
764  self.simo.system.locations.append(t.alignment_file)
765  if t.dataset:
766  self.simo._add_dataset(t.dataset)
767  pmi_offset = f.asym_unit.entity.pmi_offset
768  m = _StartingModel(
769  asym_unit=f.asym_unit.asym.pmi_range(f.start, f.end),
770  dataset=r['dataset'], asym_id=f.chain,
771  templates=templates, offset=f.offset + pmi_offset,
772  metadata=r['metadata'],
773  software=r['software'][0] if r['software'] else None,
774  script_file=r['script'])
775  m.fragments = [weakref.proxy(f)]
776  return m
777 
778 
779 class _StartingModel(ihm.startmodel.StartingModel):
780  def get_seq_dif(self):
781  return self._seq_dif
782 
783  def get_atoms(self):
784  pmi_offset = self.asym_unit.entity.pmi_offset
785  mh = IMP.mmcif.data._StartingModelAtomHandler(self.templates,
786  self.asym_unit)
787  for f in self.fragments:
788  sel = IMP.atom.Selection(f.starting_hier,
789  residue_indexes=list(range(f.start - f.offset,
790  f.end - f.offset + 1)))
791  for a in mh.get_ihm_atoms(sel.get_selected_particles(),
792  f.offset - pmi_offset):
793  yield a
794  self._seq_dif = mh._seq_dif
795 
796 
797 class _ReplicaExchangeAnalysisPostProcess(ihm.analysis.ClusterStep):
798  """Post processing using AnalysisReplicaExchange0 macro"""
799 
800  def __init__(self, rex, num_models_begin):
801  self.rex = rex
802  num_models_end = 0
803  for fname in self.get_all_stat_files():
804  with open(fname) as fh:
805  num_models_end += len(fh.readlines())
806  super(_ReplicaExchangeAnalysisPostProcess, self).__init__(
807  feature='RMSD', num_models_begin=num_models_begin,
808  num_models_end=num_models_end)
809 
810  def get_stat_file(self, cluster_num):
811  return os.path.join(self.rex._outputdir, "cluster.%d" % cluster_num,
812  'stat.out')
813 
814  def get_all_stat_files(self):
815  for i in range(self.rex._number_of_clusters):
816  yield self.get_stat_file(i)
817 
818 
819 class _ReplicaExchangeAnalysisEnsemble(ihm.model.Ensemble):
820  """Ensemble generated using AnalysisReplicaExchange0 macro"""
821 
822  num_models_deposited = None # Override base class property
823 
824  def __init__(self, pp, cluster_num, model_group, num_deposit):
825  with open(pp.get_stat_file(cluster_num)) as fh:
826  num_models = len(fh.readlines())
827  super(_ReplicaExchangeAnalysisEnsemble, self).__init__(
828  num_models=num_models,
829  model_group=model_group, post_process=pp,
830  clustering_feature=pp.feature,
831  name=model_group.name)
832  self.cluster_num = cluster_num
833  self.num_models_deposited = num_deposit
834 
835  def get_rmsf_file(self, component):
836  return os.path.join(self.post_process.rex._outputdir,
837  'cluster.%d' % self.cluster_num,
838  'rmsf.%s.dat' % component)
839 
840  def load_rmsf(self, model, component):
841  fname = self.get_rmsf_file(component)
842  if os.path.exists(fname):
843  model.parse_rmsf_file(fname, component)
844 
845  def get_localization_density_file(self, fname):
846  return os.path.join(self.post_process.rex._outputdir,
847  'cluster.%d' % self.cluster_num,
848  '%s.mrc' % fname)
849 
850  def load_localization_density(self, state, fname, select_tuple, asym_units):
851  fullpath = self.get_localization_density_file(fname)
852  if os.path.exists(fullpath):
853  details = "Localization density for %s %s" \
854  % (fname, self.model_group.name)
855  local_file = ihm.location.OutputFileLocation(fullpath,
856  details=details)
857  for s in select_tuple:
858  if isinstance(s, tuple) and len(s) == 3:
859  asym = asym_units[s[2]].pmi_range(s[0], s[1])
860  else:
861  asym = asym_units[s]
862  den = ihm.model.LocalizationDensity(file=local_file,
863  asym_unit=asym)
864  self.densities.append(den)
865 
866  def load_all_models(self, simo, state):
867  stat_fname = self.post_process.get_stat_file(self.cluster_num)
868  model_num = 0
869  with open(stat_fname) as fh:
870  stats = ast.literal_eval(fh.readline())
871  # Correct path
872  rmf_file = os.path.join(os.path.dirname(stat_fname),
873  "%d.rmf3" % model_num)
874  for c in state.all_modeled_components:
875  # todo: this only works with PMI 1
876  state._pmi_object.set_coordinates_from_rmf(c, rmf_file, 0,
877  force_rigid_update=True)
878  # todo: fill in other data from stat file, e.g. crosslink phi/psi
879  yield stats
880  model_num += 1
881  if model_num >= self.num_models_deposited:
882  return
883 
884  # todo: also support dRMS precision
885  def _get_precision(self):
886  precfile = os.path.join(self.post_process.rex._outputdir,
887  "precision.%d.%d.out" % (self.cluster_num,
888  self.cluster_num))
889  if not os.path.exists(precfile):
890  return ihm.unknown
891  # Fail if the precision.x.x.out file doesn't match the cluster
892  r = re.compile('All .*/cluster.%d/ average centroid distance ([\d\.]+)'
893  % self.cluster_num)
894  with open(precfile) as fh:
895  for line in fh:
896  m = r.match(line)
897  if m:
898  return float(m.group(1))
899 
900  precision = property(lambda self: self._get_precision(),
901  lambda self, val: None)
902 
903 class _SimpleEnsemble(ihm.model.Ensemble):
904  """Simple manually-created ensemble"""
905 
906  num_models_deposited = None # Override base class property
907 
908  def __init__(self, pp, model_group, num_models, drmsd,
909  num_models_deposited, ensemble_file):
910  super(_SimpleEnsemble, self).__init__(
911  model_group=model_group, post_process=pp, num_models=num_models,
912  file=ensemble_file, precision=drmsd, name=model_group.name,
913  clustering_feature='dRMSD')
914  self.num_models_deposited = num_models_deposited
915 
916  def load_localization_density(self, state, component, asym, local_file):
917  den = ihm.model.LocalizationDensity(file=local_file, asym_unit=asym)
918  self.densities.append(den)
919 
920 
921 class _EntityMapper(dict):
922  """Handle mapping from IMP components to CIF entities.
923  Multiple components may map to the same entity if they share sequence."""
924  def __init__(self, system):
925  super(_EntityMapper, self).__init__()
926  self._sequence_dict = {}
927  self._entities = []
928  self.system = system
929 
930  def add(self, component_name, sequence, offset):
931  def entity_seq(sequence):
932  # Map X to UNK
933  if 'X' in sequence:
934  return ['UNK' if s == 'X' else s for s in sequence]
935  else:
936  return sequence
937  if sequence not in self._sequence_dict:
938  # Use the name of the first component, stripped of any copy number,
939  # as the description of the entity
940  d = component_name.split("@")[0].split(".")[0]
941  entity = Entity(entity_seq(sequence), description=d,
942  pmi_offset=offset)
943  self.system.entities.append(entity)
944  self._sequence_dict[sequence] = entity
945  self[component_name] = self._sequence_dict[sequence]
946 
947 
948 class _TransformedComponent(object):
949  def __init__(self, name, original, transform):
950  self.name, self.original, self.transform = name, original, transform
951 
952 
953 class _SimpleRef(object):
954  """Class with similar interface to weakref.ref, but keeps a strong ref"""
955  def __init__(self, ref):
956  self.ref = ref
957  def __call__(self):
958  return self.ref
959 
960 
961 class _State(ihm.model.State):
962  """Representation of a single state in the system."""
963 
964  def __init__(self, pmi_object, po):
965  # Point to the PMI object for this state. Use a weak reference
966  # since the state object typically points to us too, so we need
967  # to break the reference cycle. In PMI1 this will be a
968  # Representation object; in PMI2 it is the PMI2 State object itself.
969  self._pmi_object = weakref.proxy(pmi_object)
970  if hasattr(pmi_object, 'state'):
971  # Need a strong ref to pmi_object.state to prevent it from being
972  # cleaned up when doing PMI1 multistate modeling
973  self._pmi_state = _SimpleRef(pmi_object.state)
974  else:
975  self._pmi_state = weakref.ref(pmi_object)
976  # Preserve PMI state name
977  old_name = self.name
978  super(_State, self).__init__(experiment_type='Fraction of bulk')
979  self.name = old_name
980 
981  # The assembly of all components modeled by IMP in this state.
982  # This may be smaller than the complete assembly.
983  self.modeled_assembly = ihm.Assembly(
984  name="Modeled assembly",
985  description=self.get_postfixed_name(
986  "All components modeled by IMP"))
987  po.system.orphan_assemblies.append(self.modeled_assembly)
988 
989  self.all_modeled_components = []
990 
991  def __hash__(self):
992  return hash(self._pmi_state())
993  def __eq__(self, other):
994  return self._pmi_state() == other._pmi_state()
995 
996  def add_model_group(self, group):
997  self.append(group)
998  return group
999 
1000  def get_prefixed_name(self, name):
1001  """Prefix the given name with the state name, if available."""
1002  if self.short_name:
1003  return self.short_name + ' ' + name
1004  else:
1005  # Can't use capitalize() since that un-capitalizes everything
1006  # but the first letter
1007  return name[0].upper() + name[1:] if name else ''
1008 
1009  def get_postfixed_name(self, name):
1010  """Postfix the given name with the state name, if available."""
1011  if self.short_name:
1012  return "%s in state %s" % (name, self.short_name)
1013  else:
1014  return name
1015 
1016  short_name = property(lambda self: self._pmi_state().short_name)
1017  long_name = property(lambda self: self._pmi_state().long_name)
1018  def __get_name(self):
1019  return self._pmi_state().long_name
1020  def __set_name(self, val):
1021  self._pmi_state().long_name = val
1022  name = property(__get_name, __set_name)
1023 
1024 
1025 class Entity(ihm.Entity):
1026  """A single entity in the system.
1027  This functions identically to the base ihm.Entity class, but it
1028  allows identifying residues by either the IHM numbering scheme
1029  (seq_id, which is always contiguous starting at 1) or by the PMI
1030  scheme (which is similar, but may not start at 1 if the sequence in
1031  the FASTA file has one or more N-terminal gaps"""
1032  def __init__(self, sequence, pmi_offset, *args, **keys):
1033  # Offset between PMI numbering and IHM; <pmi_#> = <ihm_#> + pmi_offset
1034  # (pmi_offset is also the number of N-terminal gaps in the FASTA file)
1035  self.pmi_offset = pmi_offset
1036  super(Entity, self).__init__(sequence, *args, **keys)
1037 
1038  def pmi_residue(self, res_id):
1039  """Return a single IHM residue indexed using PMI numbering"""
1040  return self.residue(res_id - self.pmi_offset)
1041 
1042  def pmi_range(self, res_id_begin, res_id_end):
1043  """Return a range of IHM residues indexed using PMI numbering"""
1044  off = self.pmi_offset
1045  return self(res_id_begin - off, res_id_end - off)
1046 
1047 
1048 class AsymUnit(ihm.AsymUnit):
1049  """A single asymmetric unit in the system.
1050  This functions identically to the base ihm.AsymUnit class, but it
1051  allows identifying residues by either the IHM numbering scheme
1052  (seq_id, which is always contiguous starting at 1) or by the PMI
1053  scheme (which is similar, but may not start at 1 if the sequence in
1054  the FASTA file has one or more N-terminal gaps"""
1055 
1056  def __init__(self, entity, *args, **keys):
1057  super(AsymUnit, self).__init__(
1058  entity, auth_seq_id_map=entity.pmi_offset, *args, **keys)
1059 
1060  def pmi_residue(self, res_id):
1061  """Return a single IHM residue indexed using PMI numbering"""
1062  return self.residue(res_id - self.entity.pmi_offset)
1063 
1064  def pmi_range(self, res_id_begin, res_id_end):
1065  """Return a range of IHM residues indexed using PMI numbering"""
1066  off = self.entity.pmi_offset
1067  return self(res_id_begin - off, res_id_end - off)
1068 
1069 
1071  """Class to encode a modeling protocol as mmCIF.
1072 
1073  IMP has basic support for writing out files in mmCIF format, for
1074  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
1075  After creating an instance of this class, attach it to an
1076  IMP.pmi1.representation.Representation object. After this, any
1077  generated models and metadata are automatically collected and
1078  output as mmCIF.
1079  """
1080  def __init__(self, fh):
1081  # Ultimately, collect data in an ihm.System object
1082  self.system = ihm.System()
1083  self._state_group = ihm.model.StateGroup()
1084  self.system.state_groups.append(self._state_group)
1085 
1086  self.fh = fh
1087  self._state_ensemble_offset = 0
1088  self._each_metadata = [] # list of metadata for each representation
1089  self._file_datasets = []
1090  self._main_script = os.path.abspath(sys.argv[0])
1091 
1092  # Point to the main modeling script
1093  loc = ihm.location.WorkflowFileLocation(path=self._main_script,
1094  details="The main integrative modeling script")
1095  self.system.locations.append(loc)
1096 
1097  self._states = {}
1098  self.__asym_states = {}
1099  self._working_directory = os.getcwd()
1100  self.default_representation = self.create_representation(
1101  "Default representation")
1102  self.entities = _EntityMapper(self.system)
1103  # Mapping from component names to ihm.AsymUnit
1104  self.asym_units = {}
1105  self._all_components = {}
1106  self.all_modeled_components = []
1107  self._transformed_components = []
1108  self.sequence_dict = {}
1109 
1110  # Common geometric objects used in PMI systems
1111  self._xy_plane = ihm.geometry.XYPlane()
1112  self._xz_plane = ihm.geometry.XZPlane()
1113  self._z_axis = ihm.geometry.ZAxis()
1114  self._center_origin = ihm.geometry.Center(0,0,0)
1115  self._identity_transform = ihm.geometry.Transformation.identity()
1116 
1117  # Coordinates to exclude
1118  self._exclude_coords = {}
1119 
1120  self.all_representations = _AllModelRepresentations(self)
1121  self.all_protocols = _AllProtocols(self)
1122  self.all_datasets = _AllDatasets(self.system)
1123  self.all_starting_models = _AllStartingModels(self)
1124 
1125  self.all_software = _AllSoftware(self.system)
1126 
1127  def create_representation(self, name):
1128  """Create a new Representation and return it. This can be
1129  passed to add_model(), add_bead_element() or add_pdb_element()."""
1130  r = ihm.representation.Representation()
1131  self.system.orphan_representations.append(r)
1132  return r
1133 
1134  def exclude_coordinates(self, component, seqrange):
1135  """Don't record coordinates for the given domain.
1136  Coordinates for the given domain (specified by a component name
1137  and a 2-element tuple giving the start and end residue numbers)
1138  will be excluded from the mmCIF file. This can be used to exclude
1139  parts of the structure that weren't well resolved in modeling.
1140  Any bead or residue that lies wholly within this range will be
1141  excluded. Multiple ranges for a given component can be excluded
1142  by calling this method multiple times."""
1143  if component not in self._exclude_coords:
1144  self._exclude_coords[component] = []
1145  self._exclude_coords[component].append(seqrange)
1146 
1147  def _is_excluded(self, component, start, end):
1148  """Return True iff this chunk of sequence should be excluded"""
1149  for seqrange in self._exclude_coords.get(component, ()):
1150  if start >= seqrange[0] and end <= seqrange[1]:
1151  return True
1152 
1153  def _add_state(self, state):
1154  """Create a new state and return a pointer to it."""
1155  self._state_ensemble_offset = len(self.system.ensembles)
1156  s = _State(state, self)
1157  self._state_group.append(s)
1158  self._last_state = s
1159  return s
1160 
1161  def get_file_dataset(self, fname):
1162  for d in self._file_datasets:
1163  fd = d.get(os.path.abspath(fname), None)
1164  if fd:
1165  return fd
1166 
1167  def _get_chain_for_component(self, name, output):
1168  """Get the chain ID for a component, if any."""
1169  # todo: handle multiple copies
1170  if name in self.asym_units:
1171  return self.asym_units[name]._id
1172  else:
1173  # A non-modeled component doesn't have a chain ID
1174  return None
1175 
1176  def _get_assembly_comps(self, assembly):
1177  """Get the names of the components in the given assembly"""
1178  # component name = asym_unit.details
1179  comps = {}
1180  for ca in assembly:
1181  comps[ca.details] = None
1182  return comps
1183 
1184  def create_transformed_component(self, state, name, original, transform):
1185  """Make a new component that's a transformed copy of another.
1186  All representation for the existing component is copied to the
1187  new one."""
1188  assembly_comps = self._get_assembly_comps(state.modeled_assembly)
1189  if name in assembly_comps:
1190  raise ValueError("Component %s already exists" % name)
1191  elif original not in assembly_comps:
1192  raise ValueError("Original component %s does not exist" % original)
1193  self.create_component(state, name, True)
1194  self.add_component_sequence(state, name, self.sequence_dict[original])
1195  self._transformed_components.append(_TransformedComponent(
1196  name, original, transform))
1197  self.all_representations.copy_component(state, name, original,
1198  self.asym_units[name])
1199 
1200  def create_component(self, state, name, modeled):
1201  new_comp = name not in self._all_components
1202  self._all_components[name] = None
1203  if modeled:
1204  state.all_modeled_components.append(name)
1205  if new_comp:
1206  self.asym_units[name] = None # assign asym once we get sequence
1207  self.all_modeled_components.append(name)
1208 
1209  def add_component_sequence(self, state, name, seq):
1210  def get_offset(seq):
1211  # Count length of gaps at start of sequence
1212  for i in range(len(seq)):
1213  if seq[i] != '-':
1214  return seq[i:], i
1215  seq, offset = get_offset(seq)
1216  if name in self.sequence_dict:
1217  if self.sequence_dict[name] != seq:
1218  raise ValueError("Sequence mismatch for component %s" % name)
1219  else:
1220  self.sequence_dict[name] = seq
1221  self.entities.add(name, seq, offset)
1222  if name in self.asym_units:
1223  if self.asym_units[name] is None:
1224  # Set up a new asymmetric unit for this component
1225  entity = self.entities[name]
1226  asym = AsymUnit(entity, details=name)
1227  self.system.asym_units.append(asym)
1228  self.asym_units[name] = asym
1229  state.modeled_assembly.append(self.asym_units[name])
1230 
1231  def _add_restraint_model_fits(self):
1232  """Add fits to restraints for all known models"""
1233  for group, m in self.system._all_models():
1234  if m._is_restrained:
1235  for r in self.system.restraints:
1236  if hasattr(r, 'add_fits_from_model_statfile'):
1237  r.add_fits_from_model_statfile(m)
1238 
1239  def _add_em2d_raw_micrographs(self):
1240  """If the deprecated metadata.EMMicrographsDataset class was used,
1241  transfer its data to the EM2D restraint."""
1242  for r in self.system.restraints:
1243  if isinstance(r, _EM2DRestraint):
1244  for d in r.dataset.parents:
1245  if isinstance(d, IMP.pmi1.metadata.EMMicrographsDataset):
1246  r.number_raw_micrographs = d.number
1247  if isinstance(d.location,
1248  IMP.pmi1.metadata.FileLocation):
1249  d.location.content_type = \
1250  ihm.location.InputFileLocation.content_type
1251 
1252  def flush(self):
1253  self.finalize()
1254  ihm.dumper.write(self.fh, [self.system])
1255 
1256  def finalize(self):
1257  """Do any final processing on the class hierarchy.
1258  After calling this method, the `system` member (an instance
1259  of `ihm.System`) completely reproduces the PMI modeling, and
1260  can be written out to an mmCIF file with `ihm.dumper.write`,
1261  and/or modified using the ihm API."""
1262  self._add_restraint_model_fits()
1263 
1264  self._add_em2d_raw_micrographs()
1265 
1266  # Add metadata to ihm.System
1267  self.system.software.extend(m for m in self._metadata
1268  if isinstance(m, ihm.Software))
1269  self.system.citations.extend(m for m in self._metadata
1270  if isinstance(m, ihm.Citation))
1271  self.system.locations.extend(m for m in self._metadata
1272  if isinstance(m, ihm.location.FileLocation))
1273 
1274  # Point all locations to repos, if applicable
1275  all_repos = [m for m in self._metadata
1276  if isinstance(m, ihm.location.Repository)]
1277  self.system.update_locations_in_repositories(all_repos)
1278 
1279  def add_pdb_element(self, state, name, start, end, offset, pdbname,
1280  chain, hier, representation=None):
1281  if self._is_excluded(name, start, end):
1282  return
1283  if representation is None:
1284  representation = self.default_representation
1285  asym = self.asym_units[name]
1286  p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
1287  hier, asym)
1288  self.all_representations.add_fragment(state, representation, p)
1289  self.all_starting_models.add_pdb_fragment(p)
1290 
1291  def add_bead_element(self, state, name, start, end, num, hier,
1292  representation=None):
1293  if self._is_excluded(name, start, end):
1294  return
1295  if representation is None:
1296  representation = self.default_representation
1297  asym = self.asym_units[name]
1298  pmi_offset = asym.entity.pmi_offset
1299  b = _BeadsFragment(state, name, start - pmi_offset, end - pmi_offset,
1300  num, hier, asym)
1301  self.all_representations.add_fragment(state, representation, b)
1302 
1303  def get_cross_link_group(self, pmi_restraint):
1304  r = _CrossLinkRestraint(pmi_restraint)
1305  self.system.restraints.append(r)
1306  self._add_restraint_dataset(r) # so that all-dataset group works
1307  return r
1308 
1309  def add_experimental_cross_link(self, r1, c1, r2, c2, rsr):
1310  if c1 not in self._all_components or c2 not in self._all_components:
1311  # Crosslink refers to a component we didn't model
1312  # As a quick hack, just ignore it.
1313  # todo: need to add an entity for this anyway (so will need the
1314  # sequence, and add to struct_assembly)
1315  return None
1316  e1 = self.entities[c1]
1317  e2 = self.entities[c2]
1318  xl = ihm.restraint.ExperimentalCrossLink(residue1=e1.pmi_residue(r1),
1319  residue2=e2.pmi_residue(r2))
1320  rsr.experimental_cross_links.append([xl])
1321  return xl
1322 
1323  def add_cross_link(self, state, ex_xl, p1, p2, length, sigma1_p, sigma2_p,
1324  psi_p, rsr):
1325  # Map p1/p2 to asym units
1326  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1327  d = ihm.restraint.UpperBoundDistanceRestraint(length)
1328 
1329  if _get_by_residue(p1) and _get_by_residue(p2):
1330  cls = _ResidueCrossLink
1331  else:
1332  cls = _FeatureCrossLink
1333  xl = cls(ex_xl, asym1=asym[p1], asym2=asym[p2], distance=d,
1334  restrain_all=True)
1335  # Needed to look up fits later
1336  xl.psi_p, xl.sigma1_p, xl.sigma2_p = psi_p, sigma1_p, sigma2_p
1337  rsr.cross_links.append(xl)
1338 
1339  def add_replica_exchange(self, state, rex):
1340  # todo: allow for metadata to say how many replicas were used in the
1341  # actual experiment, and how many independent runs were carried out
1342  # (use these as multipliers to get the correct total number of
1343  # output models)
1344  self.all_protocols.add_step(_ReplicaExchangeProtocolStep(state, rex),
1345  state)
1346 
1347  def _add_simple_dynamics(self, num_models_end, method):
1348  # Always assumed that we're dealing with the last state
1349  state = self._last_state
1350  self.all_protocols.add_step(_SimpleProtocolStep(state, num_models_end,
1351  method), state)
1352 
1353  def _add_protocol(self):
1354  # Always assumed that we're dealing with the last state
1355  state = self._last_state
1356  self.all_protocols.add_protocol(state)
1357 
1358  def _add_dataset(self, dataset):
1359  return self.all_datasets.add(self._last_state, dataset)
1360 
1361  def _add_restraint_dataset(self, restraint):
1362  return self.all_datasets.add_restraint(self._last_state, restraint)
1363 
1364  def _add_simple_postprocessing(self, num_models_begin, num_models_end):
1365  # Always assumed that we're dealing with the last state
1366  state = self._last_state
1367  pp = ihm.analysis.ClusterStep('RMSD', num_models_begin, num_models_end)
1368  self.all_protocols.add_postproc(pp, state)
1369  return pp
1370 
1371  def _add_no_postprocessing(self, num_models):
1372  # Always assumed that we're dealing with the last state
1373  state = self._last_state
1374  pp = ihm.analysis.EmptyStep()
1375  pp.num_models_begin = pp.num_models_end = num_models
1376  self.all_protocols.add_postproc(pp, state)
1377  return pp
1378 
1379  def _add_simple_ensemble(self, pp, name, num_models, drmsd,
1380  num_models_deposited, localization_densities,
1381  ensemble_file):
1382  """Add an ensemble generated by ad hoc methods (not using PMI).
1383  This is currently only used by the Nup84 system."""
1384  # Always assumed that we're dealing with the last state
1385  state = self._last_state
1386  group = ihm.model.ModelGroup(name=state.get_postfixed_name(name))
1387  state.add_model_group(group)
1388  if ensemble_file:
1389  # Deprecated Location class does not state input vs output
1390  if isinstance(ensemble_file, IMP.pmi1.metadata.FileLocation):
1391  ensemble_file.content_type = \
1392  ihm.location.OutputFileLocation.content_type
1393  self.system.locations.append(ensemble_file)
1394  e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
1395  ensemble_file)
1396  self.system.ensembles.append(e)
1397  for c in state.all_modeled_components:
1398  den = localization_densities.get(c, None)
1399  if den:
1400  # Deprecated Location class does not state input vs output
1401  if isinstance(den, IMP.pmi1.metadata.FileLocation):
1402  den.content_type = \
1403  ihm.location.OutputFileLocation.content_type
1404  e.load_localization_density(state, c, self.asym_units[c], den)
1405  return e
1406 
1407  def set_ensemble_file(self, i, location):
1408  """Point a previously-created ensemble to an 'all-models' file.
1409  This could be a trajectory such as DCD, an RMF, or a multimodel
1410  PDB file."""
1411  # Deprecated Location class does not state input vs output
1412  if isinstance(location, IMP.pmi1.metadata.FileLocation):
1413  location.content_type = ihm.location.OutputFileLocation.content_type
1414  self.system.locations.append(location)
1415  # Ensure that we point to an ensemble related to the current state
1416  ind = i + self._state_ensemble_offset
1417  self.system.ensembles[ind].file = location
1418 
1419  def add_replica_exchange_analysis(self, state, rex, density_custom_ranges):
1420  # todo: add prefilter as an additional postprocess step (complication:
1421  # we don't know how many models it filtered)
1422  # todo: postpone rmsf/density localization extraction until after
1423  # relevant methods are called (currently it is done from the
1424  # constructor)
1425  protocol = self.all_protocols.get_last_protocol(state)
1426  num_models = protocol.steps[-1].num_models_end
1427  pp = _ReplicaExchangeAnalysisPostProcess(rex, num_models)
1428  self.all_protocols.add_postproc(pp, state)
1429  for i in range(rex._number_of_clusters):
1430  group = ihm.model.ModelGroup(name=state.get_prefixed_name(
1431  'cluster %d' % (i + 1)))
1432  state.add_model_group(group)
1433  # todo: make # of models to deposit configurable somewhere
1434  e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
1435  self.system.ensembles.append(e)
1436  # Add localization density info if available
1437  for fname, stuple in sorted(density_custom_ranges.items()):
1438  e.load_localization_density(state, fname, stuple,
1439  self.asym_units)
1440  for stats in e.load_all_models(self, state):
1441  m = self.add_model(group)
1442  # Since we currently only deposit 1 model, it is the
1443  # best scoring one
1444  m.name = 'Best scoring model'
1445  m.stats = stats
1446  # Add RMSF info if available
1447  for c in state.all_modeled_components:
1448  e.load_rmsf(m, c)
1449 
1450  def _get_subassembly(self, comps, name, description):
1451  """Get an Assembly consisting of the given components.
1452  `compdict` is a dictionary of the components to add, where keys
1453  are the component names and values are the sequence ranges (or
1454  None to use all residues in the component)."""
1455  asyms = []
1456  for comp, seqrng in comps.items():
1457  a = self.asym_units[comp]
1458  asyms.append(a if seqrng is None else a(*seqrng))
1459 
1460  a = ihm.Assembly(asyms, name=name, description=description)
1461  return a
1462 
1463  def _add_foxs_restraint(self, model, comp, seqrange, dataset, rg, chi,
1464  details):
1465  """Add a basic FoXS fit. This is largely intended for use from the
1466  NPC application."""
1467  assembly = self._get_subassembly({comp:seqrange},
1468  name="SAXS subassembly",
1469  description="All components that fit SAXS data")
1470  r = ihm.restraint.SASRestraint(dataset, assembly, segment=False,
1471  fitting_method='FoXS', fitting_atom_type='Heavy atoms',
1472  multi_state=False, radius_of_gyration=rg, details=details)
1473  r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi)
1474  self.system.restraints.append(r)
1475  self._add_restraint_dataset(r) # so that all-dataset group works
1476 
1477  def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
1478  image_resolution, projection_number,
1479  micrographs_number):
1480  r = _EM2DRestraint(state, r, i, resolution, pixel_size,
1481  image_resolution, projection_number,
1482  micrographs_number)
1483  self.system.restraints.append(r)
1484  self._add_restraint_dataset(r) # so that all-dataset group works
1485 
1486  def add_em3d_restraint(self, state, target_ps, densities, pmi_restraint):
1487  # todo: need to set allow_duplicates on this dataset?
1488  r = _EM3DRestraint(self, state, pmi_restraint, target_ps, densities)
1489  self.system.restraints.append(r)
1490  self._add_restraint_dataset(r) # so that all-dataset group works
1491 
1492  def add_zaxial_restraint(self, state, ps, lower_bound, upper_bound,
1493  sigma, pmi_restraint):
1494  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1495  sigma, pmi_restraint, self._xy_plane)
1496 
1497  def add_yaxial_restraint(self, state, ps, lower_bound, upper_bound,
1498  sigma, pmi_restraint):
1499  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1500  sigma, pmi_restraint, self._xz_plane)
1501 
1502  def add_xyradial_restraint(self, state, ps, lower_bound, upper_bound,
1503  sigma, pmi_restraint):
1504  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1505  sigma, pmi_restraint, self._z_axis)
1506 
1507  def _add_geometric_restraint(self, state, ps, lower_bound, upper_bound,
1508  sigma, pmi_restraint, geom):
1509  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1510  r = _GeometricRestraint(self, state, pmi_restraint, geom,
1511  asym.get_feature(ps),
1512  ihm.restraint.LowerUpperBoundDistanceRestraint(
1513  lower_bound, upper_bound),
1514  sigma)
1515  self.system.restraints.append(r)
1516  self._add_restraint_dataset(r) # so that all-dataset group works
1517 
1518  def _get_membrane(self, tor_R, tor_r, tor_th):
1519  """Get an object representing a half-torus membrane"""
1520  if not hasattr(self, '_seen_membranes'):
1521  self._seen_membranes = {}
1522  # If a membrane already exists with all dimensions within 0.01 of
1523  # this one, reuse it
1524  membrane_id = tuple(int(x * 100.) for x in (tor_R, tor_r, tor_th))
1525  if membrane_id not in self._seen_membranes:
1526  m = ihm.geometry.HalfTorus(center=self._center_origin,
1527  transformation=self._identity_transform,
1528  major_radius=tor_R, minor_radius=tor_r, thickness=tor_th,
1529  inner=True, name='Membrane')
1530  self._seen_membranes[membrane_id] = m
1531  return self._seen_membranes[membrane_id]
1532 
1533  def add_membrane_surface_location_restraint(
1534  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1535  self._add_membrane_restraint(state, ps, tor_R, tor_r, tor_th, sigma,
1536  pmi_restraint, ihm.restraint.UpperBoundDistanceRestraint(0.))
1537 
1538  def add_membrane_exclusion_restraint(
1539  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1540  self._add_membrane_restraint(state, ps, tor_R, tor_r, tor_th, sigma,
1541  pmi_restraint, ihm.restraint.LowerBoundDistanceRestraint(0.))
1542 
1543  def _add_membrane_restraint(self, state, ps, tor_R, tor_r, tor_th,
1544  sigma, pmi_restraint, rsr):
1545  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1546  r = _GeometricRestraint(self, state, pmi_restraint,
1547  self._get_membrane(tor_R, tor_r, tor_th),
1548  asym.get_feature(ps), rsr, sigma)
1549  self.system.restraints.append(r)
1550  self._add_restraint_dataset(r) # so that all-dataset group works
1551 
1552  def add_model(self, group, assembly=None, representation=None):
1553  state = self._last_state
1554  if representation is None:
1555  representation = self.default_representation
1556  protocol = self.all_protocols.get_last_protocol(state)
1557  m = _Model(state.prot, self, protocol,
1558  assembly if assembly else state.modeled_assembly,
1559  representation)
1560  group.append(m)
1561  return m
1562 
1563  def _update_locations(self, filelocs):
1564  """Update FileLocation to point to a parent repository, if any"""
1565  all_repos = [m for m in self._metadata
1566  if isinstance(m, ihm.location.Repository)]
1567  for fileloc in filelocs:
1568  ihm.location.Repository._update_in_repos(fileloc, all_repos)
1569 
1570  _metadata = property(lambda self:
1571  itertools.chain.from_iterable(self._each_metadata))
Select non water and non hydrogen atoms.
Definition: pdb.h:245
Simple 3D transformation class.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:156
Classes for attaching metadata to PMI objects.
Definition: pmi1/metadata.py:1
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
def create_representation
Create a new Representation and return it.
Definition: /mmcif.py:1127
def finalize
Do any final processing on the class hierarchy.
Definition: /mmcif.py:1256
Class for easy writing of PDBs, RMFs, and stat files.
Definition: /output.py:65
Class to encode a modeling protocol as mmCIF.
Definition: /mmcif.py:1076
A single asymmetric unit in the system.
Definition: /mmcif.py:1052
def exclude_coordinates
Don't record coordinates for the given domain.
Definition: /mmcif.py:1137
Base class for capturing a modeling protocol.
Definition: /output.py:40
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: /mmcif.py:1038
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:731
void read_pdb(TextInput input, int model, Hierarchy h)
Classes to represent data structures used in mmCIF.
Definition: data.py:1
void add_restraint(RMF::FileHandle fh, Restraint *hs)
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
Miscellaneous utilities.
Definition: /tools.py:1
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
Definition: /mmcif.py:1407
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: /mmcif.py:1042
Basic utilities for handling cryo-electron microscopy 3D density maps.
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: /mmcif.py:1060
def create_transformed_component
Make a new component that's a transformed copy of another.
Definition: /mmcif.py:1184
3D rotation class.
Definition: Rotation3D.h:51
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Representation of the system.
A decorator for a residue.
Definition: Residue.h:135
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
A single entity in the system.
Definition: /mmcif.py:1025
VectorD< 3 > Vector3D
Definition: VectorD.h:421
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: /mmcif.py:1064
Classes for writing output files and processing them.
Definition: /output.py:1
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:189
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...