IMP logo
IMP Reference Guide  2.21.0
The Integrative Modeling Platform
/mmcif.py
1 """@namespace IMP.pmi1.mmcif
2  @brief Support for the mmCIF file format.
3 
4  IMP has basic support for writing out files in mmCIF format, for
5  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
6  mmCIF files are currently generated by creating an
7  IMP.pmi1.mmcif.ProtocolOutput class, and attaching it to an
8  IMP.pmi1.representation.Representation object, after which any
9  generated models and metadata are collected and output as mmCIF.
10 """
11 
12 from __future__ import print_function
13 import copy
14 import IMP.core
15 import IMP.algebra
16 import IMP.atom
17 import IMP.em
18 import IMP.isd
20 import IMP.pmi1.tools
21 from IMP.pmi1.tools import OrderedDict
22 import IMP.pmi1.output
23 import IMP.pmi1.metadata
24 import IMP.mmcif.data
25 import re
26 import ast
27 import sys
28 import os
29 import textwrap
30 import weakref
31 import operator
32 import itertools
33 import ihm.location
34 import ihm.dataset
35 import ihm.dumper
36 import ihm.metadata
37 import ihm.startmodel
38 import ihm.model
39 import ihm.protocol
40 import ihm.analysis
41 import ihm.representation
42 import ihm.geometry
43 import ihm.cross_linkers
44 
45 def _assign_id(obj, seen_objs, obj_by_id):
46  """Assign a unique ID to obj, and track all ids in obj_by_id."""
47  if obj not in seen_objs:
48  if not hasattr(obj, 'id'):
49  obj_by_id.append(obj)
50  obj.id = len(obj_by_id)
51  seen_objs[obj] = obj.id
52  else:
53  obj.id = seen_objs[obj]
54 
55 
56 def _get_by_residue(p):
57  """Determine whether the given particle represents a specific residue
58  or a more coarse-grained object."""
60 
61 
62 class _ComponentMapper(object):
63  """Map a Particle to a component name"""
64  def __init__(self, prot):
65  self.o = IMP.pmi1.output.Output()
66  self.prot = prot
67  self.name = 'cif-output'
68  self.o.dictionary_pdbs[self.name] = self.prot
69  self.o._init_dictchain(self.name, self.prot, multichar_chain=True)
70 
71  def __getitem__(self, p):
72  protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
73  return protname
74 
75 
76 class _AsymMapper(object):
77  """Map a Particle to an asym_unit"""
78  def __init__(self, simo, prot):
79  self.simo = simo
80  self._cm = _ComponentMapper(prot)
81 
82  def __getitem__(self, p):
83  protname = self._cm[p]
84  return self.simo.asym_units[protname]
85 
86  def get_feature(self, ps):
87  """Get an ihm.restraint.Feature that covers the given particles"""
88  # todo: handle things other than residues
89  rngs = []
90  for p in ps:
91  asym = self[p]
92  # todo: handle overlapping or adjoining ranges
94  rind = IMP.atom.Residue(p).get_index()
95  rngs.append(asym(rind, rind))
97  # PMI Fragments always contain contiguous residues
98  rinds = IMP.atom.Fragment(p).get_residue_indexes()
99  rngs.append(asym(rinds[0], rinds[-1]))
100  else:
101  raise ValueError("Unsupported particle type %s" % str(p))
102  return ihm.restraint.ResidueFeature(rngs)
103 
104 
105 class _AllSoftware(object):
106  def __init__(self, system):
107  self.system = system
108  self.modeller_used = self.phyre2_used = False
109  imp = ihm.Software(
110  name="Integrative Modeling Platform (IMP)",
111  version=IMP.__version__,
112  classification="integrative model building",
113  description="integrative model building",
114  location='https://integrativemodeling.org')
115  pmi = ihm.Software(
116  name="IMP PMI module",
117  version=IMP.pmi1.__version__,
118  classification="integrative model building",
119  description="integrative model building",
120  location='https://integrativemodeling.org')
121  self.system.software.extend([imp, pmi])
122  # Only recent versions of python-ihm support adding citations for
123  # software
124  if hasattr(imp, 'citation'):
125  if sys.version_info[0] > 2:
126  # Don't include UTF8 characters in source; it confuses Python 2
127  javi = 'Vel\u00e1zquez-Muriel J'
128  else:
129  javi = 'Velazquez-Muriel J'
130  imp.citation = ihm.Citation(
131  pmid='22272186',
132  title='Putting the pieces together: integrative modeling '
133  'platform software for structure determination of '
134  'macromolecular assemblies',
135  journal='PLoS Biol', volume=10, page_range='e1001244',
136  year=2012,
137  authors=['Russel D', 'Lasker K', 'Webb B', javi, 'Tjioe E',
138  'Schneidman-Duhovny D', 'Peterson B', 'Sali A'],
139  doi='10.1371/journal.pbio.1001244')
140  pmi.citation = ihm.Citation(
141  pmid='31396911',
142  title='Modeling Biological Complexes Using Integrative '
143  'Modeling Platform.',
144  journal='Methods Mol Biol', volume=2022, page_range=(353, 377),
145  year=2019,
146  authors=['Saltzberg D', 'Greenberg CH', 'Viswanath S',
147  'Chemmama I', 'Webb B', 'Pellarin R', 'Echeverria I',
148  'Sali A'],
149  doi='10.1007/978-1-4939-9608-7_15')
150 
151  def set_modeller_used(self, version, date):
152  if self.modeller_used:
153  return
154  self.modeller_used = True
155  s = ihm.Software(
156  name='MODELLER', classification='comparative modeling',
157  description='Comparative modeling by satisfaction '
158  'of spatial restraints, build ' + date,
159  location='https://salilab.org/modeller/', version=version)
160  self.system.software.append(s)
161  if hasattr(s, 'citation'):
162  s.citation = ihm.Citation(
163  pmid='8254673',
164  title='Comparative protein modelling by satisfaction of '
165  'spatial restraints.',
166  journal='J Mol Biol', volume=234, page_range=(779, 815),
167  year=1993, authors=['Sali A', 'Blundell TL'],
168  doi='10.1006/jmbi.1993.1626')
169 
170  def set_phyre2_used(self):
171  if self.phyre2_used:
172  return
173  self.phyre2_used = True
174  s = ihm.Software(
175  name='Phyre2', classification='protein homology modeling',
176  description='Protein Homology/analogY Recognition Engine V 2.0',
177  version='2.0', location='http://www.sbg.bio.ic.ac.uk/~phyre2/')
178  if hasattr(s, 'citation'):
179  s.citation = ihm.Citation(
180  pmid='25950237',
181  title='The Phyre2 web portal for protein modeling, '
182  'prediction and analysis.',
183  journal='Nat Protoc', volume=10, page_range=(845, 858),
184  authors=['Kelley LA', 'Mezulis S', 'Yates CM', 'Wass MN',
185  'Sternberg MJ'],
186  year=2015, doi='10.1038/nprot.2015.053')
187  self.system.software.append(s)
188 
189 def _get_fragment_is_rigid(fragment):
190  """Determine whether a fragment is modeled rigidly"""
191  leaves = IMP.atom.get_leaves(fragment.hier)
192  # Assume all leaves are set up as rigid/flexible in the same way
193  return IMP.core.RigidMember.get_is_setup(leaves[0])
194 
195 class _PDBFragment(ihm.representation.ResidueSegment):
196  """Record details about part of a PDB file used as input
197  for a component."""
198  def __init__(self, state, component, start, end, pdb_offset,
199  pdbname, chain, hier, asym_unit):
200  # start, end are PMI residue indexes (not IHM)
201  super(_PDBFragment, self).__init__(
202  asym_unit=asym_unit.pmi_range(start, end),
203  rigid=None, primitive='sphere')
204  self.component, self.start, self.end, self.offset, self.pdbname \
205  = component, start, end, pdb_offset, pdbname
206  self.state, self.chain, self.hier = state, chain, hier
208  & IMP.atom.ChainPDBSelector([chain])
209  self.starting_hier = IMP.atom.read_pdb(pdbname, state.model, sel)
210 
211  rigid = property(lambda self: _get_fragment_is_rigid(self),
212  lambda self, val: None)
213 
214  def combine(self, other):
215  pass
216 
217 
218 class _BeadsFragment(ihm.representation.FeatureSegment):
219  """Record details about beads used to represent part of a component."""
220  chain = None
221  def __init__(self, state, component, start, end, count, hier, asym_unit):
222  super(_BeadsFragment, self).__init__(asym_unit=asym_unit(start, end),
223  rigid=None, primitive='sphere', count=count)
224  self.state, self.component, self.hier \
225  = state, component, hier
226 
227  rigid = property(lambda self: _get_fragment_is_rigid(self),
228  lambda self, val: None)
229 
230  def combine(self, other):
231  # todo: don't combine if one fragment is rigid and the other flexible
232  if type(other) == type(self) and \
233  other.asym_unit.seq_id_range[0] == self.asym_unit.seq_id_range[1] + 1:
234  self.asym_unit.seq_id_range = (self.asym_unit.seq_id_range[0],
235  other.asym_unit.seq_id_range[1])
236  self.count += other.count
237  return True
238 
239 
240 class _AllModelRepresentations(object):
241  def __init__(self, simo):
242  self.simo = simo
243  # dict of fragments, ordered by representation, then component name,
244  # then state
245  self.fragments = OrderedDict()
246  self._all_representations = {}
247 
248  def copy_component(self, state, name, original, asym_unit):
249  """Copy all representation for `original` in `state` to `name`"""
250  def copy_frag(f):
251  newf = copy.copy(f)
252  newf.asym_unit = asym_unit(*f.asym_unit.seq_id_range)
253  return newf
254  for rep in self.fragments:
255  if original in self.fragments[rep]:
256  if name not in self.fragments[rep]:
257  self.fragments[rep][name] = OrderedDict()
258  self.fragments[rep][name][state] = [copy_frag(f)
259  for f in self.fragments[rep][original][state]]
260  # Assume representation for a component is the same in all
261  # states, so only write out the first one
262  first_state = list(self.fragments[rep][name].keys())[0]
263  if state is first_state:
264  representation = self._all_representations[rep]
265  representation.extend(self.fragments[rep][name][state])
266 
267  def add_fragment(self, state, representation, fragment):
268  """Add a model fragment."""
269  comp = fragment.component
270  id_rep = id(representation)
271  self._all_representations[id_rep] = representation
272  if id_rep not in self.fragments:
273  self.fragments[id_rep] = OrderedDict()
274  if comp not in self.fragments[id_rep]:
275  self.fragments[id_rep][comp] = OrderedDict()
276  if state not in self.fragments[id_rep][comp]:
277  self.fragments[id_rep][comp][state] = []
278  fragments = self.fragments[id_rep][comp][state]
279  if len(fragments) == 0 or not fragments[-1].combine(fragment):
280  fragments.append(fragment)
281  # Assume representation for a component is the same in all states,
282  # so only write out the first one
283  first_state = list(self.fragments[id_rep][comp].keys())[0]
284  if state is first_state:
285  representation.append(fragment)
286 
287 
288 class _AllDatasets(object):
289  """Track all datasets generated by PMI and add them to the ihm.System"""
290  def __init__(self, system):
291  self.system = system
292  self._datasets = []
293  self._datasets_by_state = {}
294  self._restraints_by_state = {}
295 
296  def get_all_group(self, state):
297  """Get a DatasetGroup encompassing all datasets so far in this state"""
298  # Note that if a restraint dataset is replaced after this is called,
299  # the group will still contain the old dataset - mark dataset as read
300  # only?
301  g = ihm.dataset.DatasetGroup(self._datasets_by_state.get(state, [])
302  + [r.dataset for r in self._restraints_by_state.get(state, [])
303  if r.dataset])
304  return g
305 
306  def add(self, state, dataset):
307  """Add a new dataset."""
308  self._datasets.append(dataset)
309  if state not in self._datasets_by_state:
310  self._datasets_by_state[state] = []
311  self._datasets_by_state[state].append(dataset)
312  # Make sure that the dataset ends up in the mmCIF file even if nothing
313  # refers to it
314  self.system.orphan_datasets.append(dataset)
315  return dataset
316 
317  def add_restraint(self, state, restraint):
318  """Add the dataset for a restraint"""
319  if state not in self._restraints_by_state:
320  self._restraints_by_state[state] = []
321  self._restraints_by_state[state].append(restraint)
322 
323 
324 class _CrossLinkRestraint(ihm.restraint.CrossLinkRestraint):
325  """Restrain to a set of cross links"""
326 
327  assembly = None
328  _label_map = {'wtDSS':'DSS', 'scDSS':'DSS', 'scEDC':'EDC'}
329  _descriptor_map = {'DSS': ihm.cross_linkers.dss,
330  'EDC': ihm.cross_linkers.edc}
331 
332  def __init__(self, pmi_restraint):
333  self.pmi_restraint = pmi_restraint
334  label = self.pmi_restraint.label
335  # Map commonly-used subtypes to more standard labels
336  label = self._label_map.get(label, label)
337  self.label = label
338  super(_CrossLinkRestraint, self).__init__(
339  dataset=self.pmi_restraint.dataset,
340  linker=self._get_chem_descriptor(label))
341 
342  @classmethod
343  def _get_chem_descriptor(cls, label):
344  if label not in cls._descriptor_map:
345  # If label is not a standard linker type, make a new chemical
346  # descriptor containing just the name. We don't know the chemistry
347  # so cannot specify a SMILES or INCHI string for it at this point
348  d = ihm.ChemDescriptor(label)
349  cls._descriptor_map[label] = d
350  return cls._descriptor_map[label]
351 
352  def _set_psi_sigma(self, model):
353  old_values = []
354  # Do nothing if given a different state than the restraint applies to
355  if model.m != self.pmi_restraint.m:
356  return
357  for resolution in self.pmi_restraint.sigma_dictionary:
358  statname = 'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, self.label)
359  if model.stats and statname in model.stats:
360  sigma = float(model.stats[statname])
361  p = self.pmi_restraint.sigma_dictionary[resolution][0]
362  old_values.append((p, p.get_scale()))
363  p.set_scale(sigma)
364  for psiindex in self.pmi_restraint.psi_dictionary:
365  statname = 'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, self.label)
366  if model.stats and statname in model.stats:
367  psi = float(model.stats[statname])
368  p = self.pmi_restraint.psi_dictionary[psiindex][0]
369  old_values.append((p, p.get_scale()))
370  p.set_scale(psi)
371  # If the same particle is set multiple times, make sure we get the
372  # original value last
373  return list(reversed(old_values))
374 
375  def add_fits_from_model_statfile(self, model):
376  # Set psi/sigma for all particles
377  old_values = self._set_psi_sigma(model)
378  # If no stats were collected, we can't show the fit
379  if not old_values:
380  return
381  for xl in self.cross_links:
382  # Get current psi/sigma particle value for each XL
383  xl.fits[model] = ihm.restraint.CrossLinkFit(
384  psi=xl.psi, sigma1=xl.sigma1, sigma2=xl.sigma2)
385  # Restore original psi/sigma
386  for p, scale in old_values:
387  p.set_scale(scale)
388 
389  # Have our dataset point to that in the original PMI restraint
390  def __set_dataset(self, val):
391  self.pmi_restraint.dataset = val
392  dataset = property(lambda self: self.pmi_restraint.dataset,
393  __set_dataset)
394 
395 
396 def get_asym_mapper_for_state(simo, state, asym_map):
397  asym = asym_map.get(state, None)
398  if asym is None:
399  asym = _AsymMapper(simo, state.prot)
400  asym_map[state] = asym
401  return asym
402 
403 class _PMICrossLink(object):
404  # Query PMI particles to get psi and sigma values at time of use, not
405  # time of construction (since they may be set after creating the restraint)
406  psi = property(lambda self: self.psi_p.get_scale(),
407  lambda self, val: None)
408  sigma1 = property(lambda self: self.sigma1_p.get_scale(),
409  lambda self, val: None)
410  sigma2 = property(lambda self: self.sigma2_p.get_scale(),
411  lambda self, val: None)
412 
413 
414 class _ResidueCrossLink(ihm.restraint.ResidueCrossLink, _PMICrossLink):
415  pass
416 
417 
418 class _FeatureCrossLink(ihm.restraint.FeatureCrossLink, _PMICrossLink):
419  pass
420 
421 
422 class _EM2DRestraint(ihm.restraint.EM2DRestraint):
423  def __init__(self, state, pmi_restraint, image_number, resolution,
424  pixel_size, image_resolution, projection_number,
425  micrographs_number):
426  self.pmi_restraint, self.image_number = pmi_restraint, image_number
427  super(_EM2DRestraint, self).__init__(
428  dataset=pmi_restraint.datasets[image_number],
429  assembly=state.modeled_assembly,
430  segment=False, number_raw_micrographs=micrographs_number,
431  pixel_size_width=pixel_size, pixel_size_height=pixel_size,
432  image_resolution=image_resolution,
433  number_of_projections=projection_number)
434 
435  # Have our dataset point to that in the original PMI restraint
436  def __get_dataset(self):
437  return self.pmi_restraint.datasets[self.image_number]
438  def __set_dataset(self, val):
439  self.pmi_restraint.datasets[self.image_number] = val
440  dataset = property(__get_dataset, __set_dataset)
441 
442  def add_fits_from_model_statfile(self, model):
443  ccc = self._get_cross_correlation(model)
444  transform = self._get_transformation(model)
445  rot = transform.get_rotation()
446  rm = [[e for e in rot.get_rotation_matrix_row(i)] for i in range(3)]
447  self.fits[model] = ihm.restraint.EM2DRestraintFit(
448  cross_correlation_coefficient=ccc,
449  rot_matrix=rm,
450  tr_vector=transform.get_translation())
451 
452  def _get_transformation(self, model):
453  """Get the transformation that places the model on the image"""
454  stats = model.em2d_stats or model.stats
455  prefix = 'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
456  self.image_number + 1)
457  r = [float(stats[prefix + '_Rotation%d' % i]) for i in range(4)]
458  t = [float(stats[prefix + '_Translation%d' % i])
459  for i in range(3)]
460  # If the model coordinates are transformed, need to back transform
461  # them first
462  inv = model.transform.get_inverse()
464  IMP.algebra.Vector3D(*t)) * inv
465 
466  def _get_cross_correlation(self, model):
467  """Get the cross correlation coefficient between the model projection
468  and the image"""
469  stats = model.em2d_stats or model.stats
470  return float(stats['ElectronMicroscopy2D_%s_Image%d_CCC'
471  % (self.pmi_restraint.label,
472  self.image_number + 1)])
473 
474 
475 class _EM3DRestraint(ihm.restraint.EM3DRestraint):
476 
477  def __init__(self, simo, state, pmi_restraint, target_ps, densities):
478  self.pmi_restraint = pmi_restraint
479  super(_EM3DRestraint, self).__init__(
480  dataset=pmi_restraint.dataset,
481  assembly=self._get_assembly(densities, simo, state),
482  fitting_method='Gaussian mixture models',
483  number_of_gaussians=len(target_ps))
484 
485  # Have our dataset point to that in the original PMI restraint
486  def __set_dataset(self, val):
487  self.pmi_restraint.dataset = val
488  dataset = property(lambda self: self.pmi_restraint.dataset,
489  __set_dataset)
490 
491  def _get_assembly(self, densities, simo, state):
492  """Get the Assembly that this restraint acts on"""
493  cm = _ComponentMapper(state.prot)
494  components = {}
495  for d in densities:
496  components[cm[d]] = None # None == all residues in this component
497  a = simo._get_subassembly(components,
498  name="EM subassembly",
499  description="All components that fit the EM map")
500  return a
501 
502  def add_fits_from_model_statfile(self, model):
503  ccc = self._get_cross_correlation(model)
504  self.fits[model] = ihm.restraint.EM3DRestraintFit(
505  cross_correlation_coefficient=ccc)
506 
507  def _get_cross_correlation(self, model):
508  """Get the cross correlation coefficient between the model
509  and the map"""
510  if model.stats is not None:
511  return float(model.stats['GaussianEMRestraint_%s_CCC'
512  % self.pmi_restraint.label])
513 
514 
515 class _GeometricRestraint(ihm.restraint.GeometricRestraint):
516 
517  def __init__(self, simo, state, pmi_restraint, geometric_object,
518  feature, distance, sigma):
519  self.pmi_restraint = pmi_restraint
520  super(_GeometricRestraint, self).__init__(
521  dataset=pmi_restraint.dataset,
522  geometric_object=geometric_object, feature=feature,
523  distance=distance, harmonic_force_constant = 1. / sigma,
524  restrain_all=True)
525 
526  # Have our dataset point to that in the original PMI restraint
527  def __set_dataset(self, val):
528  self.pmi_restraint.dataset = val
529  dataset = property(lambda self: self.pmi_restraint.dataset,
530  __set_dataset)
531 
532 
533 class _ReplicaExchangeProtocolStep(ihm.protocol.Step):
534  def __init__(self, state, rex):
535  if rex.monte_carlo_sample_objects is not None:
536  method = 'Replica exchange monte carlo'
537  else:
538  method = 'Replica exchange molecular dynamics'
539  super(_ReplicaExchangeProtocolStep, self).__init__(
540  assembly=state.modeled_assembly,
541  dataset_group=None, # filled in by add_step()
542  method=method, name='Sampling',
543  num_models_begin=None, # filled in by add_step()
544  num_models_end=rex.vars["number_of_frames"],
545  multi_scale=True, multi_state=False, ordered=False,
546  ensemble=True)
547 
548 
549 class _SimpleProtocolStep(ihm.protocol.Step):
550  def __init__(self, state, num_models_end, method):
551  super(_SimpleProtocolStep, self).__init__(
552  assembly=state.modeled_assembly,
553  dataset_group=None, # filled in by add_step()
554  method=method, name='Sampling',
555  num_models_begin=None, # filled in by add_step()
556  num_models_end=num_models_end,
557  multi_scale=True, multi_state=False, ordered=False,
558  ensemble=True)
559 
560 
561 class _Chain(object):
562  """Represent a single chain in a Model"""
563  def __init__(self, pmi_chain_id, asym_unit):
564  self.pmi_chain_id, self.asym_unit = pmi_chain_id, asym_unit
565  self.spheres = []
566  self.atoms = []
567 
568  def add(self, xyz, atom_type, residue_type, residue_index,
569  all_indexes, radius):
570  if atom_type is None:
571  self.spheres.append((xyz, residue_type, residue_index,
572  all_indexes, radius))
573  else:
574  self.atoms.append((xyz, atom_type, residue_type, residue_index,
575  all_indexes, radius))
576  orig_comp = property(lambda self: self.comp)
577 
578 class _TransformedChain(object):
579  """Represent a chain that is a transformed version of another"""
580  def __init__(self, orig_chain, asym_unit, transform):
581  self.orig_chain, self.asym_unit = orig_chain, asym_unit
582  self.transform = transform
583 
584  def __get_spheres(self):
585  for (xyz, residue_type, residue_index, all_indexes,
586  radius) in self.orig_chain.spheres:
587  yield (self.transform * xyz, residue_type, residue_index,
588  all_indexes, radius)
589  spheres = property(__get_spheres)
590 
591  def __get_atoms(self):
592  for (xyz, atom_type, residue_type, residue_index, all_indexes,
593  radius) in self.orig_chain.atoms:
594  yield (self.transform * xyz, atom_type, residue_type, residue_index,
595  all_indexes, radius)
596  atoms = property(__get_atoms)
597 
598  entity = property(lambda self: self.orig_chain.entity)
599  orig_comp = property(lambda self: self.orig_chain.comp)
600 
601 class _Excluder(object):
602  def __init__(self, component, simo):
603  self._seqranges = simo._exclude_coords.get(component, [])
604 
605  def is_excluded(self, indexes):
606  """Return True iff the given sequence range is excluded."""
607  for seqrange in self._seqranges:
608  if indexes[0] >= seqrange[0] and indexes[-1] <= seqrange[1]:
609  return True
610 
611 
612 class _Model(ihm.model.Model):
613  def __init__(self, prot, simo, protocol, assembly, representation):
614  super(_Model, self).__init__(assembly=assembly, protocol=protocol,
615  representation=representation)
616  self.simo = weakref.proxy(simo)
617  # Transformation from IMP coordinates into mmCIF coordinate system.
618  # Normally we pass through coordinates unchanged, but in some cases
619  # we may want to translate them (e.g. Nup84, where the deposited PDB
620  # files are all centered; we want the mmCIF files to match)
622  self.em2d_stats = None
623  self.stats = None
624  # True iff restraints act on this model
625  self._is_restrained = True
626  o = self.output = IMP.pmi1.output.Output(atomistic=True)
627  name = 'cif-output'
628  self.m = prot.get_model()
629  o.dictionary_pdbs[name] = prot
630  o._init_dictchain(name, prot, multichar_chain=True)
631  (particle_infos_for_pdb,
632  self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
633  self.geometric_center = IMP.algebra.Vector3D(*self.geometric_center)
634  self._make_spheres_atoms(particle_infos_for_pdb, o, name, simo)
635  self.rmsf = {}
636 
637  def all_chains(self, simo):
638  """Yield all chains, including transformed ones"""
639  chain_for_comp = {}
640  for c in self.chains:
641  yield c
642  chain_for_comp[c.comp] = c
643  for tc in simo._transformed_components:
644  orig_chain = chain_for_comp.get(tc.original, None)
645  if orig_chain:
646  asym = simo.asym_units[tc.name]
647  c = _TransformedChain(orig_chain, asym, tc.transform)
648  c.comp = tc.name
649  yield c
650 
651  def _make_spheres_atoms(self, particle_infos_for_pdb, o, name, simo):
652  entity_for_chain = {}
653  comp_for_chain = {}
654  correct_asym = {}
655  for protname, chain_id in o.dictchain[name].items():
656  entity_for_chain[chain_id] = simo.entities[protname]
657  comp_for_chain[chain_id] = protname
658  # When doing multi-state modeling, the chain ID returned here
659  # (assigned sequentially) might not be correct (states may have
660  # gaps in the chain IDs). Map it to the correct asym unit.
661  correct_asym[chain_id] = simo.asym_units[protname]
662 
663  # Gather by chain ID (should be sorted by chain ID already)
664  self.chains = []
665  chain = None
666  excluder = None
667 
668  for (xyz, atom_type, residue_type, chain_id, residue_index,
669  all_indexes, radius) in particle_infos_for_pdb:
670  if chain is None or chain.pmi_chain_id != chain_id:
671  chain = _Chain(chain_id, correct_asym[chain_id])
672  chain.entity = entity_for_chain[chain_id]
673  chain.comp = comp_for_chain[chain_id]
674  self.chains.append(chain)
675  excluder = _Excluder(chain.comp, simo)
676  if not excluder.is_excluded(all_indexes if all_indexes
677  else [residue_index]):
678  chain.add(xyz, atom_type, residue_type, residue_index,
679  all_indexes, radius)
680 
681  def parse_rmsf_file(self, fname, component):
682  self.rmsf[component] = rmsf = {}
683  with open(fname) as fh:
684  for line in fh:
685  resnum, blocknum, val = line.split()
686  rmsf[int(resnum)] = (int(blocknum), float(val))
687 
688  def get_rmsf(self, component, indexes):
689  """Get the RMSF value for the given residue indexes."""
690  if not self.rmsf:
691  return None
692  rmsf = self.rmsf[component]
693  blocknums = dict.fromkeys(rmsf[ind][0] for ind in indexes)
694  if len(blocknums) != 1:
695  raise ValueError("Residue indexes %s aren't all in the same block"
696  % str(indexes))
697  return rmsf[indexes[0]][1]
698 
699  def get_atoms(self):
700  for chain in self.all_chains(self.simo):
701  pmi_offset = chain.asym_unit.entity.pmi_offset
702  for atom in chain.atoms:
703  (xyz, atom_type, residue_type, residue_index,
704  all_indexes, radius) = atom
705  pt = self.transform * xyz
706  yield ihm.model.Atom(asym_unit=chain.asym_unit,
707  seq_id=residue_index - pmi_offset,
708  atom_id=atom_type.get_string(),
709  type_symbol=None, # todo: get element
710  x=pt[0], y=pt[1], z=pt[2])
711 
712  def get_spheres(self):
713  for chain in self.all_chains(self.simo):
714  pmi_offset = chain.asym_unit.entity.pmi_offset
715  for sphere in chain.spheres:
716  (xyz, residue_type, residue_index,
717  all_indexes, radius) = sphere
718  if all_indexes is None:
719  all_indexes = (residue_index,)
720  pt = self.transform * xyz
721  yield ihm.model.Sphere(asym_unit=chain.asym_unit,
722  seq_id_range=(all_indexes[0] - pmi_offset,
723  all_indexes[-1] - pmi_offset),
724  x=pt[0], y=pt[1], z=pt[2], radius=radius,
725  rmsf=self.get_rmsf(chain.orig_comp, all_indexes))
726 
727 
728 class _AllProtocols(object):
729  def __init__(self, simo):
730  self.simo = weakref.proxy(simo)
731  # Lists of Protocols by state
732  self.protocols = OrderedDict()
733 
734  def add_protocol(self, state):
735  """Add a new Protocol"""
736  if state not in self.protocols:
737  self.protocols[state] = []
738  p = ihm.protocol.Protocol()
739  self.simo.system.orphan_protocols.append(p)
740  self.protocols[state].append(p)
741 
742  def add_step(self, step, state):
743  """Add a ProtocolStep to the last Protocol of the given State"""
744  if state not in self.protocols:
745  self.add_protocol(state)
746  protocol = self.get_last_protocol(state)
747  if len(protocol.steps) == 0:
748  step.num_models_begin = 0
749  else:
750  step.num_models_begin = protocol.steps[-1].num_models_end
751  protocol.steps.append(step)
752  step.id = len(protocol.steps)
753  # Assume that protocol uses all currently-defined datasets
754  step.dataset_group = self.simo.all_datasets.get_all_group(state)
755 
756  def add_postproc(self, step, state):
757  """Add a postprocessing step to the last protocol"""
758  protocol = self.get_last_protocol(state)
759  if len(protocol.analyses) == 0:
760  protocol.analyses.append(ihm.analysis.Analysis())
761  protocol.analyses[-1].steps.append(step)
762 
763  def get_last_protocol(self, state):
764  """Return the most recently-added _Protocol"""
765  return self.protocols[state][-1]
766 
767 
768 class _AllStartingModels(object):
769  def __init__(self, simo):
770  self.simo = simo
771  # dict of starting models (entire PDB files), collected from fragments,
772  # ordered by component name and state
773  self.models = OrderedDict()
774  self.output = IMP.pmi1.output.Output()
775 
776  def add_pdb_fragment(self, fragment):
777  """Add a starting model PDB fragment."""
778  comp = fragment.component
779  state = fragment.state
780  if comp not in self.models:
781  self.models[comp] = OrderedDict()
782  if state not in self.models[comp]:
783  self.models[comp][state] = []
784  models = self.models[comp][state]
785  if len(models) == 0 \
786  or models[-1].fragments[0].pdbname != fragment.pdbname:
787  model = self._add_model(fragment)
788  models.append(model)
789  else:
790  # Break circular ref between fragment and model
791  models[-1].fragments.append(weakref.proxy(fragment))
792  # Update residue range to cover all fragments
793  pmi_offset = models[-1].asym_unit.entity.pmi_offset
794  sid_begin = min(fragment.start + fragment.offset - pmi_offset,
795  models[-1].asym_unit.seq_id_range[0])
796  sid_end = max(fragment.end + fragment.offset - pmi_offset,
797  models[-1].asym_unit.seq_id_range[1])
798  models[-1].asym_unit = fragment.asym_unit.asym(sid_begin, sid_end)
799  fragment.starting_model = models[-1]
800 
801  def _add_model(self, f):
802  parser = ihm.metadata.PDBParser()
803  r = parser.parse_file(f.pdbname)
804 
805  self.simo._add_dataset(r['dataset'])
806  # We only want the templates that model the starting model chain
807  templates = r['templates'].get(f.chain, [])
808  for t in templates:
809  if t.alignment_file:
810  self.simo.system.locations.append(t.alignment_file)
811  if t.dataset:
812  self.simo._add_dataset(t.dataset)
813  pmi_offset = f.asym_unit.entity.pmi_offset
814  m = _StartingModel(
815  asym_unit=f.asym_unit.asym.pmi_range(f.start, f.end),
816  dataset=r['dataset'], asym_id=f.chain,
817  templates=templates, offset=f.offset + pmi_offset,
818  metadata=r['metadata'],
819  software=r['software'][0] if r['software'] else None,
820  script_file=r['script'])
821  m.fragments = [weakref.proxy(f)]
822  return m
823 
824 
825 class _StartingModel(ihm.startmodel.StartingModel):
826  def get_seq_dif(self):
827  return self._seq_dif
828 
829  def get_atoms(self):
830  pmi_offset = self.asym_unit.entity.pmi_offset
831  mh = IMP.mmcif.data._StartingModelAtomHandler(self.templates,
832  self.asym_unit)
833  for f in self.fragments:
834  sel = IMP.atom.Selection(f.starting_hier,
835  residue_indexes=list(range(f.start - f.offset,
836  f.end - f.offset + 1)))
837  for a in mh.get_ihm_atoms(sel.get_selected_particles(),
838  f.offset - pmi_offset):
839  yield a
840  self._seq_dif = mh._seq_dif
841 
842 
843 class _ReplicaExchangeAnalysisPostProcess(ihm.analysis.ClusterStep):
844  """Post processing using AnalysisReplicaExchange0 macro"""
845 
846  def __init__(self, rex, num_models_begin):
847  self.rex = rex
848  num_models_end = 0
849  for fname in self.get_all_stat_files():
850  with open(fname) as fh:
851  num_models_end += len(fh.readlines())
852  super(_ReplicaExchangeAnalysisPostProcess, self).__init__(
853  feature='RMSD', num_models_begin=num_models_begin,
854  num_models_end=num_models_end)
855 
856  def get_stat_file(self, cluster_num):
857  return os.path.join(self.rex._outputdir, "cluster.%d" % cluster_num,
858  'stat.out')
859 
860  def get_all_stat_files(self):
861  for i in range(self.rex._number_of_clusters):
862  yield self.get_stat_file(i)
863 
864 
865 class _ReplicaExchangeAnalysisEnsemble(ihm.model.Ensemble):
866  """Ensemble generated using AnalysisReplicaExchange0 macro"""
867 
868  num_models_deposited = None # Override base class property
869 
870  def __init__(self, pp, cluster_num, model_group, num_deposit):
871  with open(pp.get_stat_file(cluster_num)) as fh:
872  num_models = len(fh.readlines())
873  super(_ReplicaExchangeAnalysisEnsemble, self).__init__(
874  num_models=num_models,
875  model_group=model_group, post_process=pp,
876  clustering_feature=pp.feature,
877  name=model_group.name)
878  self.cluster_num = cluster_num
879  self.num_models_deposited = num_deposit
880 
881  def get_rmsf_file(self, component):
882  return os.path.join(self.post_process.rex._outputdir,
883  'cluster.%d' % self.cluster_num,
884  'rmsf.%s.dat' % component)
885 
886  def load_rmsf(self, model, component):
887  fname = self.get_rmsf_file(component)
888  if os.path.exists(fname):
889  model.parse_rmsf_file(fname, component)
890 
891  def get_localization_density_file(self, fname):
892  return os.path.join(self.post_process.rex._outputdir,
893  'cluster.%d' % self.cluster_num,
894  '%s.mrc' % fname)
895 
896  def load_localization_density(self, state, fname, select_tuple, asym_units):
897  fullpath = self.get_localization_density_file(fname)
898  if os.path.exists(fullpath):
899  details = "Localization density for %s %s" \
900  % (fname, self.model_group.name)
901  local_file = ihm.location.OutputFileLocation(fullpath,
902  details=details)
903  for s in select_tuple:
904  if isinstance(s, tuple) and len(s) == 3:
905  asym = asym_units[s[2]].pmi_range(s[0], s[1])
906  else:
907  asym = asym_units[s]
908  den = ihm.model.LocalizationDensity(file=local_file,
909  asym_unit=asym)
910  self.densities.append(den)
911 
912  def load_all_models(self, simo, state):
913  stat_fname = self.post_process.get_stat_file(self.cluster_num)
914  model_num = 0
915  with open(stat_fname) as fh:
916  stats = ast.literal_eval(fh.readline())
917  # Correct path
918  rmf_file = os.path.join(os.path.dirname(stat_fname),
919  "%d.rmf3" % model_num)
920  for c in state.all_modeled_components:
921  # todo: this only works with PMI 1
922  state._pmi_object.set_coordinates_from_rmf(c, rmf_file, 0,
923  force_rigid_update=True)
924  # todo: fill in other data from stat file, e.g. crosslink phi/psi
925  yield stats
926  model_num += 1
927  if model_num >= self.num_models_deposited:
928  return
929 
930  # todo: also support dRMS precision
931  def _get_precision(self):
932  precfile = os.path.join(self.post_process.rex._outputdir,
933  "precision.%d.%d.out" % (self.cluster_num,
934  self.cluster_num))
935  if not os.path.exists(precfile):
936  return ihm.unknown
937  # Fail if the precision.x.x.out file doesn't match the cluster
938  r = re.compile(
939  r'All .*/cluster.%d/ average centroid distance ([\d\.]+)'
940  % self.cluster_num)
941  with open(precfile) as fh:
942  for line in fh:
943  m = r.match(line)
944  if m:
945  return float(m.group(1))
946 
947  precision = property(lambda self: self._get_precision(),
948  lambda self, val: None)
949 
950 class _SimpleEnsemble(ihm.model.Ensemble):
951  """Simple manually-created ensemble"""
952 
953  num_models_deposited = None # Override base class property
954 
955  def __init__(self, pp, model_group, num_models, drmsd,
956  num_models_deposited, ensemble_file):
957  super(_SimpleEnsemble, self).__init__(
958  model_group=model_group, post_process=pp, num_models=num_models,
959  file=ensemble_file, precision=drmsd, name=model_group.name,
960  clustering_feature='dRMSD')
961  self.num_models_deposited = num_models_deposited
962 
963  def load_localization_density(self, state, component, asym, local_file):
964  den = ihm.model.LocalizationDensity(file=local_file, asym_unit=asym)
965  self.densities.append(den)
966 
967 
968 class _EntityMapper(dict):
969  """Handle mapping from IMP components to CIF entities.
970  Multiple components may map to the same entity if they share sequence."""
971  def __init__(self, system):
972  super(_EntityMapper, self).__init__()
973  self._sequence_dict = {}
974  self._entities = []
975  self.system = system
976 
977  def add(self, component_name, sequence, offset):
978  def entity_seq(sequence):
979  # Map X to UNK
980  if 'X' in sequence:
981  return ['UNK' if s == 'X' else s for s in sequence]
982  else:
983  return sequence
984  if sequence not in self._sequence_dict:
985  # Use the name of the first component, stripped of any copy number,
986  # as the description of the entity
987  d = component_name.split("@")[0].split(".")[0]
988  entity = Entity(entity_seq(sequence), description=d,
989  pmi_offset=offset)
990  self.system.entities.append(entity)
991  self._sequence_dict[sequence] = entity
992  self[component_name] = self._sequence_dict[sequence]
993 
994 
995 class _TransformedComponent(object):
996  def __init__(self, name, original, transform):
997  self.name, self.original, self.transform = name, original, transform
998 
999 
1000 class _SimpleRef(object):
1001  """Class with similar interface to weakref.ref, but keeps a strong ref"""
1002  def __init__(self, ref):
1003  self.ref = ref
1004  def __call__(self):
1005  return self.ref
1006 
1007 
1008 class _State(ihm.model.State):
1009  """Representation of a single state in the system."""
1010 
1011  def __init__(self, pmi_object, po):
1012  # Point to the PMI object for this state. Use a weak reference
1013  # since the state object typically points to us too, so we need
1014  # to break the reference cycle. In PMI1 this will be a
1015  # Representation object; in PMI2 it is the PMI2 State object itself.
1016  self._pmi_object = weakref.proxy(pmi_object)
1017  if hasattr(pmi_object, 'state'):
1018  # Need a strong ref to pmi_object.state to prevent it from being
1019  # cleaned up when doing PMI1 multistate modeling
1020  self._pmi_state = _SimpleRef(pmi_object.state)
1021  else:
1022  self._pmi_state = weakref.ref(pmi_object)
1023  # Preserve PMI state name
1024  old_name = self.name
1025  super(_State, self).__init__(experiment_type='Fraction of bulk')
1026  self.name = old_name
1027 
1028  # The assembly of all components modeled by IMP in this state.
1029  # This may be smaller than the complete assembly.
1030  self.modeled_assembly = ihm.Assembly(
1031  name="Modeled assembly",
1032  description=self.get_postfixed_name(
1033  "All components modeled by IMP"))
1034  po.system.orphan_assemblies.append(self.modeled_assembly)
1035 
1036  self.all_modeled_components = []
1037 
1038  def __hash__(self):
1039  return hash(self._pmi_state())
1040  def __eq__(self, other):
1041  return self._pmi_state() == other._pmi_state()
1042 
1043  def add_model_group(self, group):
1044  self.append(group)
1045  return group
1046 
1047  def get_prefixed_name(self, name):
1048  """Prefix the given name with the state name, if available."""
1049  if self.short_name:
1050  return self.short_name + ' ' + name
1051  else:
1052  # Can't use capitalize() since that un-capitalizes everything
1053  # but the first letter
1054  return name[0].upper() + name[1:] if name else ''
1055 
1056  def get_postfixed_name(self, name):
1057  """Postfix the given name with the state name, if available."""
1058  if self.short_name:
1059  return "%s in state %s" % (name, self.short_name)
1060  else:
1061  return name
1062 
1063  short_name = property(lambda self: self._pmi_state().short_name)
1064  long_name = property(lambda self: self._pmi_state().long_name)
1065  def __get_name(self):
1066  return self._pmi_state().long_name
1067  def __set_name(self, val):
1068  self._pmi_state().long_name = val
1069  name = property(__get_name, __set_name)
1070 
1071 
1072 class Entity(ihm.Entity):
1073  """A single entity in the system.
1074  This functions identically to the base ihm.Entity class, but it
1075  allows identifying residues by either the IHM numbering scheme
1076  (seq_id, which is always contiguous starting at 1) or by the PMI
1077  scheme (which is similar, but may not start at 1 if the sequence in
1078  the FASTA file has one or more N-terminal gaps"""
1079  def __init__(self, sequence, pmi_offset, *args, **keys):
1080  # Offset between PMI numbering and IHM; <pmi_#> = <ihm_#> + pmi_offset
1081  # (pmi_offset is also the number of N-terminal gaps in the FASTA file)
1082  self.pmi_offset = pmi_offset
1083  super(Entity, self).__init__(sequence, *args, **keys)
1084 
1085  def pmi_residue(self, res_id):
1086  """Return a single IHM residue indexed using PMI numbering"""
1087  return self.residue(res_id - self.pmi_offset)
1088 
1089  def pmi_range(self, res_id_begin, res_id_end):
1090  """Return a range of IHM residues indexed using PMI numbering"""
1091  off = self.pmi_offset
1092  return self(res_id_begin - off, res_id_end - off)
1093 
1094 
1095 class AsymUnit(ihm.AsymUnit):
1096  """A single asymmetric unit in the system.
1097  This functions identically to the base ihm.AsymUnit class, but it
1098  allows identifying residues by either the IHM numbering scheme
1099  (seq_id, which is always contiguous starting at 1) or by the PMI
1100  scheme (which is similar, but may not start at 1 if the sequence in
1101  the FASTA file has one or more N-terminal gaps"""
1102 
1103  def __init__(self, entity, *args, **keys):
1104  super(AsymUnit, self).__init__(
1105  entity, auth_seq_id_map=entity.pmi_offset, *args, **keys)
1106 
1107  def pmi_residue(self, res_id):
1108  """Return a single IHM residue indexed using PMI numbering"""
1109  return self.residue(res_id - self.entity.pmi_offset)
1110 
1111  def pmi_range(self, res_id_begin, res_id_end):
1112  """Return a range of IHM residues indexed using PMI numbering"""
1113  off = self.entity.pmi_offset
1114  return self(res_id_begin - off, res_id_end - off)
1115 
1116 
1118  """Class to encode a modeling protocol as mmCIF.
1119 
1120  IMP has basic support for writing out files in mmCIF format, for
1121  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
1122  After creating an instance of this class, attach it to an
1123  IMP.pmi1.representation.Representation object. After this, any
1124  generated models and metadata are automatically collected and
1125  output as mmCIF.
1126  """
1127  def __init__(self, fh):
1128  # Ultimately, collect data in an ihm.System object
1129  self.system = ihm.System()
1130  self._state_group = ihm.model.StateGroup()
1131  self.system.state_groups.append(self._state_group)
1132 
1133  self.fh = fh
1134  self._state_ensemble_offset = 0
1135  self._each_metadata = [] # list of metadata for each representation
1136  self._file_datasets = []
1137  self._main_script = os.path.abspath(sys.argv[0])
1138 
1139  # Point to the main modeling script
1140  loc = ihm.location.WorkflowFileLocation(path=self._main_script,
1141  details="The main integrative modeling script")
1142  self.system.locations.append(loc)
1143 
1144  self._states = {}
1145  self.__asym_states = {}
1146  self._working_directory = os.getcwd()
1147  self.default_representation = self.create_representation(
1148  "Default representation")
1149  self.entities = _EntityMapper(self.system)
1150  # Mapping from component names to ihm.AsymUnit
1151  self.asym_units = {}
1152  self._all_components = {}
1153  self.all_modeled_components = []
1154  self._transformed_components = []
1155  self.sequence_dict = {}
1156 
1157  # Common geometric objects used in PMI systems
1158  self._xy_plane = ihm.geometry.XYPlane()
1159  self._xz_plane = ihm.geometry.XZPlane()
1160  self._z_axis = ihm.geometry.ZAxis()
1161  self._center_origin = ihm.geometry.Center(0,0,0)
1162  self._identity_transform = ihm.geometry.Transformation.identity()
1163 
1164  # Coordinates to exclude
1165  self._exclude_coords = {}
1166 
1167  self.all_representations = _AllModelRepresentations(self)
1168  self.all_protocols = _AllProtocols(self)
1169  self.all_datasets = _AllDatasets(self.system)
1170  self.all_starting_models = _AllStartingModels(self)
1171 
1172  self.all_software = _AllSoftware(self.system)
1173 
1174  def create_representation(self, name):
1175  """Create a new Representation and return it. This can be
1176  passed to add_model(), add_bead_element() or add_pdb_element()."""
1177  r = ihm.representation.Representation()
1178  self.system.orphan_representations.append(r)
1179  return r
1180 
1181  def exclude_coordinates(self, component, seqrange):
1182  """Don't record coordinates for the given domain.
1183  Coordinates for the given domain (specified by a component name
1184  and a 2-element tuple giving the start and end residue numbers)
1185  will be excluded from the mmCIF file. This can be used to exclude
1186  parts of the structure that weren't well resolved in modeling.
1187  Any bead or residue that lies wholly within this range will be
1188  excluded. Multiple ranges for a given component can be excluded
1189  by calling this method multiple times."""
1190  if component not in self._exclude_coords:
1191  self._exclude_coords[component] = []
1192  self._exclude_coords[component].append(seqrange)
1193 
1194  def _is_excluded(self, component, start, end):
1195  """Return True iff this chunk of sequence should be excluded"""
1196  for seqrange in self._exclude_coords.get(component, ()):
1197  if start >= seqrange[0] and end <= seqrange[1]:
1198  return True
1199 
1200  def _add_state(self, state):
1201  """Create a new state and return a pointer to it."""
1202  self._state_ensemble_offset = len(self.system.ensembles)
1203  s = _State(state, self)
1204  self._state_group.append(s)
1205  self._last_state = s
1206  return s
1207 
1208  def get_file_dataset(self, fname):
1209  for d in self._file_datasets:
1210  fd = d.get(os.path.abspath(fname), None)
1211  if fd:
1212  return fd
1213 
1214  def _get_chain_for_component(self, name, output):
1215  """Get the chain ID for a component, if any."""
1216  # todo: handle multiple copies
1217  if name in self.asym_units:
1218  return self.asym_units[name]._id
1219  else:
1220  # A non-modeled component doesn't have a chain ID
1221  return None
1222 
1223  def _get_assembly_comps(self, assembly):
1224  """Get the names of the components in the given assembly"""
1225  # component name = asym_unit.details
1226  comps = {}
1227  for ca in assembly:
1228  comps[ca.details] = None
1229  return comps
1230 
1231  def create_transformed_component(self, state, name, original, transform):
1232  """Make a new component that's a transformed copy of another.
1233  All representation for the existing component is copied to the
1234  new one."""
1235  assembly_comps = self._get_assembly_comps(state.modeled_assembly)
1236  if name in assembly_comps:
1237  raise ValueError("Component %s already exists" % name)
1238  elif original not in assembly_comps:
1239  raise ValueError("Original component %s does not exist" % original)
1240  self.create_component(state, name, True)
1241  self.add_component_sequence(state, name, self.sequence_dict[original])
1242  self._transformed_components.append(_TransformedComponent(
1243  name, original, transform))
1244  self.all_representations.copy_component(state, name, original,
1245  self.asym_units[name])
1246 
1247  def create_component(self, state, name, modeled):
1248  new_comp = name not in self._all_components
1249  self._all_components[name] = None
1250  if modeled:
1251  state.all_modeled_components.append(name)
1252  if new_comp:
1253  self.asym_units[name] = None # assign asym once we get sequence
1254  self.all_modeled_components.append(name)
1255 
1256  def add_component_sequence(self, state, name, seq):
1257  def get_offset(seq):
1258  # Count length of gaps at start of sequence
1259  for i in range(len(seq)):
1260  if seq[i] != '-':
1261  return seq[i:], i
1262  seq, offset = get_offset(seq)
1263  if name in self.sequence_dict:
1264  if self.sequence_dict[name] != seq:
1265  raise ValueError("Sequence mismatch for component %s" % name)
1266  else:
1267  self.sequence_dict[name] = seq
1268  self.entities.add(name, seq, offset)
1269  if name in self.asym_units:
1270  if self.asym_units[name] is None:
1271  # Set up a new asymmetric unit for this component
1272  entity = self.entities[name]
1273  asym = AsymUnit(entity, details=name)
1274  self.system.asym_units.append(asym)
1275  self.asym_units[name] = asym
1276  state.modeled_assembly.append(self.asym_units[name])
1277 
1278  def _add_restraint_model_fits(self):
1279  """Add fits to restraints for all known models"""
1280  for group, m in self.system._all_models():
1281  if m._is_restrained:
1282  for r in self.system.restraints:
1283  if hasattr(r, 'add_fits_from_model_statfile'):
1284  r.add_fits_from_model_statfile(m)
1285 
1286  def _add_em2d_raw_micrographs(self):
1287  """If the deprecated metadata.EMMicrographsDataset class was used,
1288  transfer its data to the EM2D restraint."""
1289  for r in self.system.restraints:
1290  if isinstance(r, _EM2DRestraint):
1291  for d in r.dataset.parents:
1292  if isinstance(d, IMP.pmi1.metadata.EMMicrographsDataset):
1293  r.number_raw_micrographs = d.number
1294  if isinstance(d.location,
1295  IMP.pmi1.metadata.FileLocation):
1296  d.location.content_type = \
1297  ihm.location.InputFileLocation.content_type
1298 
1299  def flush(self):
1300  self.finalize()
1301  ihm.dumper.write(self.fh, [self.system])
1302 
1303  def finalize(self):
1304  """Do any final processing on the class hierarchy.
1305  After calling this method, the `system` member (an instance
1306  of `ihm.System`) completely reproduces the PMI modeling, and
1307  can be written out to an mmCIF file with `ihm.dumper.write`,
1308  and/or modified using the ihm API."""
1309  self._add_restraint_model_fits()
1310 
1311  self._add_em2d_raw_micrographs()
1312 
1313  # Add metadata to ihm.System
1314  self.system.software.extend(m for m in self._metadata
1315  if isinstance(m, ihm.Software))
1316  self.system.citations.extend(m for m in self._metadata
1317  if isinstance(m, ihm.Citation))
1318  self.system.locations.extend(m for m in self._metadata
1319  if isinstance(m, ihm.location.FileLocation))
1320 
1321  # Point all locations to repos, if applicable
1322  all_repos = [m for m in self._metadata
1323  if isinstance(m, ihm.location.Repository)]
1324  self.system.update_locations_in_repositories(all_repos)
1325 
1326  def add_pdb_element(self, state, name, start, end, offset, pdbname,
1327  chain, hier, representation=None):
1328  if self._is_excluded(name, start, end):
1329  return
1330  if representation is None:
1331  representation = self.default_representation
1332  asym = self.asym_units[name]
1333  p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
1334  hier, asym)
1335  self.all_representations.add_fragment(state, representation, p)
1336  self.all_starting_models.add_pdb_fragment(p)
1337 
1338  def add_bead_element(self, state, name, start, end, num, hier,
1339  representation=None):
1340  if self._is_excluded(name, start, end):
1341  return
1342  if representation is None:
1343  representation = self.default_representation
1344  asym = self.asym_units[name]
1345  pmi_offset = asym.entity.pmi_offset
1346  b = _BeadsFragment(state, name, start - pmi_offset, end - pmi_offset,
1347  num, hier, asym)
1348  self.all_representations.add_fragment(state, representation, b)
1349 
1350  def get_cross_link_group(self, pmi_restraint):
1351  r = _CrossLinkRestraint(pmi_restraint)
1352  self.system.restraints.append(r)
1353  self._add_restraint_dataset(r) # so that all-dataset group works
1354  return r
1355 
1356  def add_experimental_cross_link(self, r1, c1, r2, c2, rsr):
1357  if c1 not in self._all_components or c2 not in self._all_components:
1358  # Crosslink refers to a component we didn't model
1359  # As a quick hack, just ignore it.
1360  # todo: need to add an entity for this anyway (so will need the
1361  # sequence, and add to struct_assembly)
1362  return None
1363  e1 = self.entities[c1]
1364  e2 = self.entities[c2]
1365  xl = ihm.restraint.ExperimentalCrossLink(residue1=e1.pmi_residue(r1),
1366  residue2=e2.pmi_residue(r2))
1367  rsr.experimental_cross_links.append([xl])
1368  return xl
1369 
1370  def add_cross_link(self, state, ex_xl, p1, p2, length, sigma1_p, sigma2_p,
1371  psi_p, rsr):
1372  # Map p1/p2 to asym units
1373  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1374  d = ihm.restraint.UpperBoundDistanceRestraint(length)
1375 
1376  if _get_by_residue(p1) and _get_by_residue(p2):
1377  cls = _ResidueCrossLink
1378  else:
1379  cls = _FeatureCrossLink
1380  xl = cls(ex_xl, asym1=asym[p1], asym2=asym[p2], distance=d,
1381  restrain_all=False)
1382  # Needed to look up fits later
1383  xl.psi_p, xl.sigma1_p, xl.sigma2_p = psi_p, sigma1_p, sigma2_p
1384  rsr.cross_links.append(xl)
1385 
1386  def add_replica_exchange(self, state, rex):
1387  # todo: allow for metadata to say how many replicas were used in the
1388  # actual experiment, and how many independent runs were carried out
1389  # (use these as multipliers to get the correct total number of
1390  # output models)
1391  self.all_protocols.add_step(_ReplicaExchangeProtocolStep(state, rex),
1392  state)
1393 
1394  def _add_simple_dynamics(self, num_models_end, method):
1395  # Always assumed that we're dealing with the last state
1396  state = self._last_state
1397  self.all_protocols.add_step(_SimpleProtocolStep(state, num_models_end,
1398  method), state)
1399 
1400  def _add_protocol(self):
1401  # Always assumed that we're dealing with the last state
1402  state = self._last_state
1403  self.all_protocols.add_protocol(state)
1404 
1405  def _add_dataset(self, dataset):
1406  return self.all_datasets.add(self._last_state, dataset)
1407 
1408  def _add_restraint_dataset(self, restraint):
1409  return self.all_datasets.add_restraint(self._last_state, restraint)
1410 
1411  def _add_simple_postprocessing(self, num_models_begin, num_models_end):
1412  # Always assumed that we're dealing with the last state
1413  state = self._last_state
1414  pp = ihm.analysis.ClusterStep('RMSD', num_models_begin, num_models_end)
1415  self.all_protocols.add_postproc(pp, state)
1416  return pp
1417 
1418  def _add_no_postprocessing(self, num_models):
1419  # Always assumed that we're dealing with the last state
1420  state = self._last_state
1421  pp = ihm.analysis.EmptyStep()
1422  pp.num_models_begin = pp.num_models_end = num_models
1423  self.all_protocols.add_postproc(pp, state)
1424  return pp
1425 
1426  def _add_simple_ensemble(self, pp, name, num_models, drmsd,
1427  num_models_deposited, localization_densities,
1428  ensemble_file):
1429  """Add an ensemble generated by ad hoc methods (not using PMI).
1430  This is currently only used by the Nup84 system."""
1431  # Always assumed that we're dealing with the last state
1432  state = self._last_state
1433  group = ihm.model.ModelGroup(name=state.get_postfixed_name(name))
1434  state.add_model_group(group)
1435  if ensemble_file:
1436  # Deprecated Location class does not state input vs output
1437  if isinstance(ensemble_file, IMP.pmi1.metadata.FileLocation):
1438  ensemble_file.content_type = \
1439  ihm.location.OutputFileLocation.content_type
1440  self.system.locations.append(ensemble_file)
1441  e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
1442  ensemble_file)
1443  self.system.ensembles.append(e)
1444  for c in state.all_modeled_components:
1445  den = localization_densities.get(c, None)
1446  if den:
1447  # Deprecated Location class does not state input vs output
1448  if isinstance(den, IMP.pmi1.metadata.FileLocation):
1449  den.content_type = \
1450  ihm.location.OutputFileLocation.content_type
1451  e.load_localization_density(state, c, self.asym_units[c], den)
1452  return e
1453 
1454  def set_ensemble_file(self, i, location):
1455  """Point a previously-created ensemble to an 'all-models' file.
1456  This could be a trajectory such as DCD, an RMF, or a multimodel
1457  PDB file."""
1458  # Deprecated Location class does not state input vs output
1459  if isinstance(location, IMP.pmi1.metadata.FileLocation):
1460  location.content_type = ihm.location.OutputFileLocation.content_type
1461  self.system.locations.append(location)
1462  # Ensure that we point to an ensemble related to the current state
1463  ind = i + self._state_ensemble_offset
1464  self.system.ensembles[ind].file = location
1465 
1466  def add_replica_exchange_analysis(self, state, rex, density_custom_ranges):
1467  # todo: add prefilter as an additional postprocess step (complication:
1468  # we don't know how many models it filtered)
1469  # todo: postpone rmsf/density localization extraction until after
1470  # relevant methods are called (currently it is done from the
1471  # constructor)
1472  protocol = self.all_protocols.get_last_protocol(state)
1473  num_models = protocol.steps[-1].num_models_end
1474  pp = _ReplicaExchangeAnalysisPostProcess(rex, num_models)
1475  self.all_protocols.add_postproc(pp, state)
1476  for i in range(rex._number_of_clusters):
1477  group = ihm.model.ModelGroup(name=state.get_prefixed_name(
1478  'cluster %d' % (i + 1)))
1479  state.add_model_group(group)
1480  # todo: make # of models to deposit configurable somewhere
1481  e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
1482  self.system.ensembles.append(e)
1483  # Add localization density info if available
1484  for fname, stuple in sorted(density_custom_ranges.items()):
1485  e.load_localization_density(state, fname, stuple,
1486  self.asym_units)
1487  for stats in e.load_all_models(self, state):
1488  m = self.add_model(group)
1489  # Since we currently only deposit 1 model, it is the
1490  # best scoring one
1491  m.name = 'Best scoring model'
1492  m.stats = stats
1493  # Add RMSF info if available
1494  for c in state.all_modeled_components:
1495  e.load_rmsf(m, c)
1496 
1497  def _get_subassembly(self, comps, name, description):
1498  """Get an Assembly consisting of the given components.
1499  `compdict` is a dictionary of the components to add, where keys
1500  are the component names and values are the sequence ranges (or
1501  None to use all residues in the component)."""
1502  asyms = []
1503  for comp, seqrng in comps.items():
1504  a = self.asym_units[comp]
1505  asyms.append(a if seqrng is None else a(*seqrng))
1506 
1507  a = ihm.Assembly(asyms, name=name, description=description)
1508  return a
1509 
1510  def _add_foxs_restraint(self, model, comp, seqrange, dataset, rg, chi,
1511  details):
1512  """Add a basic FoXS fit. This is largely intended for use from the
1513  NPC application."""
1514  assembly = self._get_subassembly({comp:seqrange},
1515  name="SAXS subassembly",
1516  description="All components that fit SAXS data")
1517  r = ihm.restraint.SASRestraint(dataset, assembly, segment=False,
1518  fitting_method='FoXS', fitting_atom_type='Heavy atoms',
1519  multi_state=False, radius_of_gyration=rg, details=details)
1520  r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi)
1521  self.system.restraints.append(r)
1522  self._add_restraint_dataset(r) # so that all-dataset group works
1523 
1524  def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
1525  image_resolution, projection_number,
1526  micrographs_number):
1527  r = _EM2DRestraint(state, r, i, resolution, pixel_size,
1528  image_resolution, projection_number,
1529  micrographs_number)
1530  self.system.restraints.append(r)
1531  self._add_restraint_dataset(r) # so that all-dataset group works
1532 
1533  def add_em3d_restraint(self, state, target_ps, densities, pmi_restraint):
1534  # todo: need to set allow_duplicates on this dataset?
1535  r = _EM3DRestraint(self, state, pmi_restraint, target_ps, densities)
1536  self.system.restraints.append(r)
1537  self._add_restraint_dataset(r) # so that all-dataset group works
1538 
1539  def add_zaxial_restraint(self, state, ps, lower_bound, upper_bound,
1540  sigma, pmi_restraint):
1541  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1542  sigma, pmi_restraint, self._xy_plane)
1543 
1544  def add_yaxial_restraint(self, state, ps, lower_bound, upper_bound,
1545  sigma, pmi_restraint):
1546  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1547  sigma, pmi_restraint, self._xz_plane)
1548 
1549  def add_xyradial_restraint(self, state, ps, lower_bound, upper_bound,
1550  sigma, pmi_restraint):
1551  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1552  sigma, pmi_restraint, self._z_axis)
1553 
1554  def _add_geometric_restraint(self, state, ps, lower_bound, upper_bound,
1555  sigma, pmi_restraint, geom):
1556  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1557  r = _GeometricRestraint(self, state, pmi_restraint, geom,
1558  asym.get_feature(ps),
1559  ihm.restraint.LowerUpperBoundDistanceRestraint(
1560  lower_bound, upper_bound),
1561  sigma)
1562  self.system.restraints.append(r)
1563  self._add_restraint_dataset(r) # so that all-dataset group works
1564 
1565  def _get_membrane(self, tor_R, tor_r, tor_th):
1566  """Get an object representing a half-torus membrane"""
1567  if not hasattr(self, '_seen_membranes'):
1568  self._seen_membranes = {}
1569  # If a membrane already exists with all dimensions within 0.01 of
1570  # this one, reuse it
1571  membrane_id = tuple(int(x * 100.) for x in (tor_R, tor_r, tor_th))
1572  if membrane_id not in self._seen_membranes:
1573  m = ihm.geometry.HalfTorus(center=self._center_origin,
1574  transformation=self._identity_transform,
1575  major_radius=tor_R, minor_radius=tor_r, thickness=tor_th,
1576  inner=True, name='Membrane')
1577  self._seen_membranes[membrane_id] = m
1578  return self._seen_membranes[membrane_id]
1579 
1580  def add_membrane_surface_location_restraint(
1581  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1582  self._add_membrane_restraint(state, ps, tor_R, tor_r, tor_th, sigma,
1583  pmi_restraint, ihm.restraint.UpperBoundDistanceRestraint(0.))
1584 
1585  def add_membrane_exclusion_restraint(
1586  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1587  self._add_membrane_restraint(state, ps, tor_R, tor_r, tor_th, sigma,
1588  pmi_restraint, ihm.restraint.LowerBoundDistanceRestraint(0.))
1589 
1590  def _add_membrane_restraint(self, state, ps, tor_R, tor_r, tor_th,
1591  sigma, pmi_restraint, rsr):
1592  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1593  r = _GeometricRestraint(self, state, pmi_restraint,
1594  self._get_membrane(tor_R, tor_r, tor_th),
1595  asym.get_feature(ps), rsr, sigma)
1596  self.system.restraints.append(r)
1597  self._add_restraint_dataset(r) # so that all-dataset group works
1598 
1599  def add_model(self, group, assembly=None, representation=None):
1600  state = self._last_state
1601  if representation is None:
1602  representation = self.default_representation
1603  protocol = self.all_protocols.get_last_protocol(state)
1604  m = _Model(state.prot, self, protocol,
1605  assembly if assembly else state.modeled_assembly,
1606  representation)
1607  group.append(m)
1608  return m
1609 
1610  def _update_locations(self, filelocs):
1611  """Update FileLocation to point to a parent repository, if any"""
1612  all_repos = [m for m in self._metadata
1613  if isinstance(m, ihm.location.Repository)]
1614  for fileloc in filelocs:
1615  ihm.location.Repository._update_in_repos(fileloc, all_repos)
1616 
1617  _metadata = property(lambda self:
1618  itertools.chain.from_iterable(self._each_metadata))
Select non water and non hydrogen atoms.
Definition: pdb.h:314
Simple 3D transformation class.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:158
Classes for attaching metadata to PMI objects.
Definition: pmi1/metadata.py:1
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:245
def create_representation
Create a new Representation and return it.
Definition: /mmcif.py:1174
def finalize
Do any final processing on the class hierarchy.
Definition: /mmcif.py:1303
Class for easy writing of PDBs, RMFs, and stat files.
Definition: /output.py:65
Class to encode a modeling protocol as mmCIF.
Definition: /mmcif.py:1123
A single asymmetric unit in the system.
Definition: /mmcif.py:1099
def exclude_coordinates
Don't record coordinates for the given domain.
Definition: /mmcif.py:1184
Base class for capturing a modeling protocol.
Definition: /output.py:40
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: /mmcif.py:1085
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:749
void read_pdb(TextInput input, int model, Hierarchy h)
Classes to represent data structures used in mmCIF.
Definition: data.py:1
void add_restraint(RMF::FileHandle fh, Restraint *hs)
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
Miscellaneous utilities.
Definition: /tools.py:1
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
Definition: /mmcif.py:1454
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: /mmcif.py:1089
Basic utilities for handling cryo-electron microscopy 3D density maps.
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: /mmcif.py:1107
def create_transformed_component
Make a new component that's a transformed copy of another.
Definition: /mmcif.py:1231
3D rotation class.
Definition: Rotation3D.h:52
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Representation of the system.
A decorator for a residue.
Definition: Residue.h:137
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
A single entity in the system.
Definition: /mmcif.py:1072
VectorD< 3 > Vector3D
Definition: VectorD.h:408
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: /mmcif.py:1111
Classes for writing output files and processing them.
Definition: /output.py:1
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:256
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...