IMP logo
IMP Reference Guide  2.16.0
The Integrative Modeling Platform
/mmcif.py
1 """@namespace IMP.pmi1.mmcif
2  @brief Support for the mmCIF file format.
3 
4  IMP has basic support for writing out files in mmCIF format, for
5  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
6  mmCIF files are currently generated by creating an
7  IMP.pmi1.mmcif.ProtocolOutput class, and attaching it to an
8  IMP.pmi1.representation.Representation object, after which any
9  generated models and metadata are collected and output as mmCIF.
10 """
11 
12 from __future__ import print_function
13 import copy
14 import IMP.core
15 import IMP.algebra
16 import IMP.atom
17 import IMP.em
18 import IMP.isd
20 import IMP.pmi1.tools
21 from IMP.pmi1.tools import OrderedDict
22 import IMP.pmi1.output
23 import IMP.pmi1.metadata
24 import IMP.mmcif.data
25 import re
26 import ast
27 import sys
28 import os
29 import textwrap
30 import weakref
31 import operator
32 import itertools
33 import ihm.location
34 import ihm.dataset
35 import ihm.dumper
36 import ihm.metadata
37 import ihm.startmodel
38 import ihm.model
39 import ihm.protocol
40 import ihm.analysis
41 import ihm.representation
42 import ihm.geometry
43 import ihm.cross_linkers
44 
45 def _assign_id(obj, seen_objs, obj_by_id):
46  """Assign a unique ID to obj, and track all ids in obj_by_id."""
47  if obj not in seen_objs:
48  if not hasattr(obj, 'id'):
49  obj_by_id.append(obj)
50  obj.id = len(obj_by_id)
51  seen_objs[obj] = obj.id
52  else:
53  obj.id = seen_objs[obj]
54 
55 
56 def _get_by_residue(p):
57  """Determine whether the given particle represents a specific residue
58  or a more coarse-grained object."""
60 
61 
62 class _ComponentMapper(object):
63  """Map a Particle to a component name"""
64  def __init__(self, prot):
65  self.o = IMP.pmi1.output.Output()
66  self.prot = prot
67  self.name = 'cif-output'
68  self.o.dictionary_pdbs[self.name] = self.prot
69  self.o._init_dictchain(self.name, self.prot, multichar_chain=True)
70 
71  def __getitem__(self, p):
72  protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
73  return protname
74 
75 
76 class _AsymMapper(object):
77  """Map a Particle to an asym_unit"""
78  def __init__(self, simo, prot):
79  self.simo = simo
80  self._cm = _ComponentMapper(prot)
81 
82  def __getitem__(self, p):
83  protname = self._cm[p]
84  return self.simo.asym_units[protname]
85 
86  def get_feature(self, ps):
87  """Get an ihm.restraint.Feature that covers the given particles"""
88  # todo: handle things other than residues
89  rngs = []
90  for p in ps:
91  asym = self[p]
92  # todo: handle overlapping or adjoining ranges
94  rind = IMP.atom.Residue(p).get_index()
95  rngs.append(asym(rind, rind))
97  # PMI Fragments always contain contiguous residues
98  rinds = IMP.atom.Fragment(p).get_residue_indexes()
99  rngs.append(asym(rinds[0], rinds[-1]))
100  else:
101  raise ValueError("Unsupported particle type %s" % str(p))
102  return ihm.restraint.ResidueFeature(rngs)
103 
104 
105 class _AllSoftware(object):
106  def __init__(self, system):
107  self.system = system
108  self.modeller_used = self.phyre2_used = False
109  imp = ihm.Software(
110  name="Integrative Modeling Platform (IMP)",
111  version=IMP.__version__,
112  classification="integrative model building",
113  description="integrative model building",
114  location='https://integrativemodeling.org')
115  pmi = ihm.Software(
116  name="IMP PMI module",
117  version=IMP.pmi1.__version__,
118  classification="integrative model building",
119  description="integrative model building",
120  location='https://integrativemodeling.org')
121  self.system.software.extend([imp, pmi])
122  # Only recent versions of python-ihm support adding citations for
123  # software
124  if hasattr(imp, 'citation'):
125  if sys.version_info[0] > 2:
126  # Don't include UTF8 characters in source; it confuses Python 2
127  javi = 'Vel\u00e1zquez-Muriel J'
128  else:
129  javi = 'Velazquez-Muriel J'
130  imp.citation = ihm.Citation(
131  pmid='22272186',
132  title='Putting the pieces together: integrative modeling '
133  'platform software for structure determination of '
134  'macromolecular assemblies',
135  journal='PLoS Biol', volume=10, page_range='e1001244',
136  year=2012,
137  authors=['Russel D', 'Lasker K', 'Webb B', javi, 'Tjioe E',
138  'Schneidman-Duhovny D', 'Peterson B', 'Sali A'],
139  doi='10.1371/journal.pbio.1001244')
140  pmi.citation = ihm.Citation(
141  pmid='31396911',
142  title='Modeling Biological Complexes Using Integrative '
143  'Modeling Platform.',
144  journal='Methods Mol Biol', volume=2022, page_range=(353, 377),
145  year=2019,
146  authors=['Saltzberg D', 'Greenberg CH', 'Viswanath S',
147  'Chemmama I', 'Webb B', 'Pellarin R', 'Echeverria I',
148  'Sali A'],
149  doi='10.1007/978-1-4939-9608-7_15')
150 
151  def set_modeller_used(self, version, date):
152  if self.modeller_used:
153  return
154  self.modeller_used = True
155  s = ihm.Software(
156  name='MODELLER', classification='comparative modeling',
157  description='Comparative modeling by satisfaction '
158  'of spatial restraints, build ' + date,
159  location='https://salilab.org/modeller/', version=version)
160  self.system.software.append(s)
161  if hasattr(s, 'citation'):
162  s.citation = ihm.Citation(
163  pmid='8254673',
164  title='Comparative protein modelling by satisfaction of '
165  'spatial restraints.',
166  journal='J Mol Biol', volume=234, page_range=(779, 815),
167  year=1993, authors=['Sali A', 'Blundell TL'],
168  doi='10.1006/jmbi.1993.1626')
169 
170  def set_phyre2_used(self):
171  if self.phyre2_used:
172  return
173  self.phyre2_used = True
174  s = ihm.Software(
175  name='Phyre2', classification='protein homology modeling',
176  description='Protein Homology/analogY Recognition Engine V 2.0',
177  version='2.0', location='http://www.sbg.bio.ic.ac.uk/~phyre2/')
178  if hasattr(s, 'citation'):
179  s.citation = ihm.Citation(
180  pmid='25950237',
181  title='The Phyre2 web portal for protein modeling, '
182  'prediction and analysis.',
183  journal='Nat Protoc', volume=10, page_range=(845, 858),
184  authors=['Kelley LA', 'Mezulis S', 'Yates CM', 'Wass MN',
185  'Sternberg MJ'],
186  year=2015, doi='10.1038/nprot.2015.053')
187  self.system.software.append(s)
188 
189 def _get_fragment_is_rigid(fragment):
190  """Determine whether a fragment is modeled rigidly"""
191  leaves = IMP.atom.get_leaves(fragment.hier)
192  # Assume all leaves are set up as rigid/flexible in the same way
193  return IMP.core.RigidMember.get_is_setup(leaves[0])
194 
195 class _PDBFragment(ihm.representation.ResidueSegment):
196  """Record details about part of a PDB file used as input
197  for a component."""
198  def __init__(self, state, component, start, end, pdb_offset,
199  pdbname, chain, hier, asym_unit):
200  # start, end are PMI residue indexes (not IHM)
201  super(_PDBFragment, self).__init__(
202  asym_unit=asym_unit.pmi_range(start, end),
203  rigid=None, primitive='sphere')
204  self.component, self.start, self.end, self.offset, self.pdbname \
205  = component, start, end, pdb_offset, pdbname
206  self.state, self.chain, self.hier = state, chain, hier
209  self.starting_hier = IMP.atom.read_pdb(pdbname, state.model, sel)
210 
211  rigid = property(lambda self: _get_fragment_is_rigid(self),
212  lambda self, val: None)
213 
214  def combine(self, other):
215  pass
216 
217 
218 class _BeadsFragment(ihm.representation.FeatureSegment):
219  """Record details about beads used to represent part of a component."""
220  chain = None
221  def __init__(self, state, component, start, end, count, hier, asym_unit):
222  super(_BeadsFragment, self).__init__(asym_unit=asym_unit(start, end),
223  rigid=None, primitive='sphere', count=count)
224  self.state, self.component, self.hier \
225  = state, component, hier
226 
227  rigid = property(lambda self: _get_fragment_is_rigid(self),
228  lambda self, val: None)
229 
230  def combine(self, other):
231  # todo: don't combine if one fragment is rigid and the other flexible
232  if type(other) == type(self) and \
233  other.asym_unit.seq_id_range[0] == self.asym_unit.seq_id_range[1] + 1:
234  self.asym_unit.seq_id_range = (self.asym_unit.seq_id_range[0],
235  other.asym_unit.seq_id_range[1])
236  self.count += other.count
237  return True
238 
239 
240 class _AllModelRepresentations(object):
241  def __init__(self, simo):
242  self.simo = simo
243  # dict of fragments, ordered by representation, then component name,
244  # then state
245  self.fragments = OrderedDict()
246  self._all_representations = {}
247 
248  def copy_component(self, state, name, original, asym_unit):
249  """Copy all representation for `original` in `state` to `name`"""
250  def copy_frag(f):
251  newf = copy.copy(f)
252  newf.asym_unit = asym_unit(*f.asym_unit.seq_id_range)
253  return newf
254  for rep in self.fragments:
255  if original in self.fragments[rep]:
256  if name not in self.fragments[rep]:
257  self.fragments[rep][name] = OrderedDict()
258  self.fragments[rep][name][state] = [copy_frag(f)
259  for f in self.fragments[rep][original][state]]
260  # Assume representation for a component is the same in all
261  # states, so only write out the first one
262  first_state = list(self.fragments[rep][name].keys())[0]
263  if state is first_state:
264  representation = self._all_representations[rep]
265  representation.extend(self.fragments[rep][name][state])
266 
267  def add_fragment(self, state, representation, fragment):
268  """Add a model fragment."""
269  comp = fragment.component
270  id_rep = id(representation)
271  self._all_representations[id_rep] = representation
272  if id_rep not in self.fragments:
273  self.fragments[id_rep] = OrderedDict()
274  if comp not in self.fragments[id_rep]:
275  self.fragments[id_rep][comp] = OrderedDict()
276  if state not in self.fragments[id_rep][comp]:
277  self.fragments[id_rep][comp][state] = []
278  fragments = self.fragments[id_rep][comp][state]
279  if len(fragments) == 0 or not fragments[-1].combine(fragment):
280  fragments.append(fragment)
281  # Assume representation for a component is the same in all states,
282  # so only write out the first one
283  first_state = list(self.fragments[id_rep][comp].keys())[0]
284  if state is first_state:
285  representation.append(fragment)
286 
287 
288 class _AllDatasets(object):
289  """Track all datasets generated by PMI and add them to the ihm.System"""
290  def __init__(self, system):
291  self.system = system
292  self._datasets = []
293  self._datasets_by_state = {}
294  self._restraints_by_state = {}
295 
296  def get_all_group(self, state):
297  """Get a DatasetGroup encompassing all datasets so far in this state"""
298  # Note that if a restraint dataset is replaced after this is called,
299  # the group will still contain the old dataset - mark dataset as read
300  # only?
301  g = ihm.dataset.DatasetGroup(self._datasets_by_state.get(state, [])
302  + [r.dataset for r in self._restraints_by_state.get(state, [])
303  if r.dataset])
304  return g
305 
306  def add(self, state, dataset):
307  """Add a new dataset."""
308  self._datasets.append(dataset)
309  if state not in self._datasets_by_state:
310  self._datasets_by_state[state] = []
311  self._datasets_by_state[state].append(dataset)
312  # Make sure that the dataset ends up in the mmCIF file even if nothing
313  # refers to it
314  self.system.orphan_datasets.append(dataset)
315  return dataset
316 
317  def add_restraint(self, state, restraint):
318  """Add the dataset for a restraint"""
319  if state not in self._restraints_by_state:
320  self._restraints_by_state[state] = []
321  self._restraints_by_state[state].append(restraint)
322 
323 
324 class _CrossLinkRestraint(ihm.restraint.CrossLinkRestraint):
325  """Restrain to a set of cross links"""
326 
327  assembly = None
328  _label_map = {'wtDSS':'DSS', 'scDSS':'DSS', 'scEDC':'EDC'}
329  _descriptor_map = {'DSS': ihm.cross_linkers.dss,
330  'EDC': ihm.cross_linkers.edc}
331 
332  def __init__(self, pmi_restraint):
333  self.pmi_restraint = pmi_restraint
334  label = self.pmi_restraint.label
335  # Map commonly-used subtypes to more standard labels
336  label = self._label_map.get(label, label)
337  self.label = label
338  super(_CrossLinkRestraint, self).__init__(
339  dataset=self.pmi_restraint.dataset,
340  linker=self._get_chem_descriptor(label))
341 
342  @classmethod
343  def _get_chem_descriptor(cls, label):
344  if label not in cls._descriptor_map:
345  # If label is not a standard linker type, make a new chemical
346  # descriptor containing just the name. We don't know the chemistry
347  # so cannot specify a SMILES or INCHI string for it at this point
348  d = ihm.ChemDescriptor(label)
349  cls._descriptor_map[label] = d
350  return cls._descriptor_map[label]
351 
352  def _set_psi_sigma(self, model):
353  old_values = []
354  # Do nothing if given a different state than the restraint applies to
355  if model.m != self.pmi_restraint.m:
356  return
357  for resolution in self.pmi_restraint.sigma_dictionary:
358  statname = 'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, self.label)
359  if model.stats and statname in model.stats:
360  sigma = float(model.stats[statname])
361  p = self.pmi_restraint.sigma_dictionary[resolution][0]
362  old_values.append((p, p.get_scale()))
363  p.set_scale(sigma)
364  for psiindex in self.pmi_restraint.psi_dictionary:
365  statname = 'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, self.label)
366  if model.stats and statname in model.stats:
367  psi = float(model.stats[statname])
368  p = self.pmi_restraint.psi_dictionary[psiindex][0]
369  old_values.append((p, p.get_scale()))
370  p.set_scale(psi)
371  # If the same particle is set multiple times, make sure we get the
372  # original value last
373  return list(reversed(old_values))
374 
375  def add_fits_from_model_statfile(self, model):
376  # Set psi/sigma for all particles
377  old_values = self._set_psi_sigma(model)
378  # If no stats were collected, we can't show the fit
379  if not old_values:
380  return
381  for xl in self.cross_links:
382  # Get current psi/sigma particle value for each XL
383  xl.fits[model] = ihm.restraint.CrossLinkFit(
384  psi=xl.psi, sigma1=xl.sigma1, sigma2=xl.sigma2)
385  # Restore original psi/sigma
386  for p, scale in old_values:
387  p.set_scale(scale)
388 
389  # Have our dataset point to that in the original PMI restraint
390  def __set_dataset(self, val):
391  self.pmi_restraint.dataset = val
392  dataset = property(lambda self: self.pmi_restraint.dataset,
393  __set_dataset)
394 
395 
396 def get_asym_mapper_for_state(simo, state, asym_map):
397  asym = asym_map.get(state, None)
398  if asym is None:
399  asym = _AsymMapper(simo, state.prot)
400  asym_map[state] = asym
401  return asym
402 
403 class _PMICrossLink(object):
404  # Query PMI particles to get psi and sigma values at time of use, not
405  # time of construction (since they may be set after creating the restraint)
406  psi = property(lambda self: self.psi_p.get_scale(),
407  lambda self, val: None)
408  sigma1 = property(lambda self: self.sigma1_p.get_scale(),
409  lambda self, val: None)
410  sigma2 = property(lambda self: self.sigma2_p.get_scale(),
411  lambda self, val: None)
412 
413 
414 class _ResidueCrossLink(ihm.restraint.ResidueCrossLink, _PMICrossLink):
415  pass
416 
417 
418 class _FeatureCrossLink(ihm.restraint.FeatureCrossLink, _PMICrossLink):
419  pass
420 
421 
422 class _EM2DRestraint(ihm.restraint.EM2DRestraint):
423  def __init__(self, state, pmi_restraint, image_number, resolution,
424  pixel_size, image_resolution, projection_number,
425  micrographs_number):
426  self.pmi_restraint, self.image_number = pmi_restraint, image_number
427  super(_EM2DRestraint, self).__init__(
428  dataset=pmi_restraint.datasets[image_number],
429  assembly=state.modeled_assembly,
430  segment=False, number_raw_micrographs=micrographs_number,
431  pixel_size_width=pixel_size, pixel_size_height=pixel_size,
432  image_resolution=image_resolution,
433  number_of_projections=projection_number)
434 
435  # Have our dataset point to that in the original PMI restraint
436  def __get_dataset(self):
437  return self.pmi_restraint.datasets[self.image_number]
438  def __set_dataset(self, val):
439  self.pmi_restraint.datasets[self.image_number] = val
440  dataset = property(__get_dataset, __set_dataset)
441 
442  def add_fits_from_model_statfile(self, model):
443  ccc = self._get_cross_correlation(model)
444  transform = self._get_transformation(model)
445  rot = transform.get_rotation()
446  rm = [[e for e in rot.get_rotation_matrix_row(i)] for i in range(3)]
447  self.fits[model] = ihm.restraint.EM2DRestraintFit(
448  cross_correlation_coefficient=ccc,
449  rot_matrix=rm,
450  tr_vector=transform.get_translation())
451 
452  def _get_transformation(self, model):
453  """Get the transformation that places the model on the image"""
454  stats = model.em2d_stats or model.stats
455  prefix = 'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
456  self.image_number + 1)
457  r = [float(stats[prefix + '_Rotation%d' % i]) for i in range(4)]
458  t = [float(stats[prefix + '_Translation%d' % i])
459  for i in range(3)]
460  # If the model coordinates are transformed, need to back transform
461  # them first
462  inv = model.transform.get_inverse()
464  IMP.algebra.Vector3D(*t)) * inv
465 
466  def _get_cross_correlation(self, model):
467  """Get the cross correlation coefficient between the model projection
468  and the image"""
469  stats = model.em2d_stats or model.stats
470  return float(stats['ElectronMicroscopy2D_%s_Image%d_CCC'
471  % (self.pmi_restraint.label,
472  self.image_number + 1)])
473 
474 
475 class _EM3DRestraint(ihm.restraint.EM3DRestraint):
476 
477  def __init__(self, simo, state, pmi_restraint, target_ps, densities):
478  self.pmi_restraint = pmi_restraint
479  super(_EM3DRestraint, self).__init__(
480  dataset=pmi_restraint.dataset,
481  assembly=self._get_assembly(densities, simo, state),
482  fitting_method='Gaussian mixture models',
483  number_of_gaussians=len(target_ps))
484 
485  # Have our dataset point to that in the original PMI restraint
486  def __set_dataset(self, val):
487  self.pmi_restraint.dataset = val
488  dataset = property(lambda self: self.pmi_restraint.dataset,
489  __set_dataset)
490 
491  def _get_assembly(self, densities, simo, state):
492  """Get the Assembly that this restraint acts on"""
493  cm = _ComponentMapper(state.prot)
494  components = {}
495  for d in densities:
496  components[cm[d]] = None # None == all residues in this component
497  a = simo._get_subassembly(components,
498  name="EM subassembly",
499  description="All components that fit the EM map")
500  return a
501 
502  def add_fits_from_model_statfile(self, model):
503  ccc = self._get_cross_correlation(model)
504  self.fits[model] = ihm.restraint.EM3DRestraintFit(
505  cross_correlation_coefficient=ccc)
506 
507  def _get_cross_correlation(self, model):
508  """Get the cross correlation coefficient between the model
509  and the map"""
510  if model.stats is not None:
511  return float(model.stats['GaussianEMRestraint_%s_CCC'
512  % self.pmi_restraint.label])
513 
514 
515 class _GeometricRestraint(ihm.restraint.GeometricRestraint):
516 
517  def __init__(self, simo, state, pmi_restraint, geometric_object,
518  feature, distance, sigma):
519  self.pmi_restraint = pmi_restraint
520  super(_GeometricRestraint, self).__init__(
521  dataset=pmi_restraint.dataset,
522  geometric_object=geometric_object, feature=feature,
523  distance=distance, harmonic_force_constant = 1. / sigma,
524  restrain_all=True)
525 
526  # Have our dataset point to that in the original PMI restraint
527  def __set_dataset(self, val):
528  self.pmi_restraint.dataset = val
529  dataset = property(lambda self: self.pmi_restraint.dataset,
530  __set_dataset)
531 
532 
533 class _ReplicaExchangeProtocolStep(ihm.protocol.Step):
534  def __init__(self, state, rex):
535  if rex.monte_carlo_sample_objects is not None:
536  method = 'Replica exchange monte carlo'
537  else:
538  method = 'Replica exchange molecular dynamics'
539  super(_ReplicaExchangeProtocolStep, self).__init__(
540  assembly=state.modeled_assembly,
541  dataset_group=None, # filled in by add_step()
542  method=method, name='Sampling',
543  num_models_begin=None, # filled in by add_step()
544  num_models_end=rex.vars["number_of_frames"],
545  multi_scale=True, multi_state=False, ordered=False)
546 
547 
548 class _SimpleProtocolStep(ihm.protocol.Step):
549  def __init__(self, state, num_models_end, method):
550  super(_SimpleProtocolStep, self).__init__(
551  assembly=state.modeled_assembly,
552  dataset_group=None, # filled in by add_step()
553  method=method, name='Sampling',
554  num_models_begin=None, # filled in by add_step()
555  num_models_end=num_models_end,
556  multi_scale=True, multi_state=False, ordered=False)
557 
558 
559 class _Chain(object):
560  """Represent a single chain in a Model"""
561  def __init__(self, pmi_chain_id, asym_unit):
562  self.pmi_chain_id, self.asym_unit = pmi_chain_id, asym_unit
563  self.spheres = []
564  self.atoms = []
565 
566  def add(self, xyz, atom_type, residue_type, residue_index,
567  all_indexes, radius):
568  if atom_type is None:
569  self.spheres.append((xyz, residue_type, residue_index,
570  all_indexes, radius))
571  else:
572  self.atoms.append((xyz, atom_type, residue_type, residue_index,
573  all_indexes, radius))
574  orig_comp = property(lambda self: self.comp)
575 
576 class _TransformedChain(object):
577  """Represent a chain that is a transformed version of another"""
578  def __init__(self, orig_chain, asym_unit, transform):
579  self.orig_chain, self.asym_unit = orig_chain, asym_unit
580  self.transform = transform
581 
582  def __get_spheres(self):
583  for (xyz, residue_type, residue_index, all_indexes,
584  radius) in self.orig_chain.spheres:
585  yield (self.transform * xyz, residue_type, residue_index,
586  all_indexes, radius)
587  spheres = property(__get_spheres)
588 
589  def __get_atoms(self):
590  for (xyz, atom_type, residue_type, residue_index, all_indexes,
591  radius) in self.orig_chain.atoms:
592  yield (self.transform * xyz, atom_type, residue_type, residue_index,
593  all_indexes, radius)
594  atoms = property(__get_atoms)
595 
596  entity = property(lambda self: self.orig_chain.entity)
597  orig_comp = property(lambda self: self.orig_chain.comp)
598 
599 class _Excluder(object):
600  def __init__(self, component, simo):
601  self._seqranges = simo._exclude_coords.get(component, [])
602 
603  def is_excluded(self, indexes):
604  """Return True iff the given sequence range is excluded."""
605  for seqrange in self._seqranges:
606  if indexes[0] >= seqrange[0] and indexes[-1] <= seqrange[1]:
607  return True
608 
609 
610 class _Model(ihm.model.Model):
611  def __init__(self, prot, simo, protocol, assembly, representation):
612  super(_Model, self).__init__(assembly=assembly, protocol=protocol,
613  representation=representation)
614  self.simo = weakref.proxy(simo)
615  # Transformation from IMP coordinates into mmCIF coordinate system.
616  # Normally we pass through coordinates unchanged, but in some cases
617  # we may want to translate them (e.g. Nup84, where the deposited PDB
618  # files are all centered; we want the mmCIF files to match)
620  self.em2d_stats = None
621  self.stats = None
622  # True iff restraints act on this model
623  self._is_restrained = True
624  o = self.output = IMP.pmi1.output.Output(atomistic=True)
625  name = 'cif-output'
626  self.m = prot.get_model()
627  o.dictionary_pdbs[name] = prot
628  o._init_dictchain(name, prot, multichar_chain=True)
629  (particle_infos_for_pdb,
630  self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
631  self.geometric_center = IMP.algebra.Vector3D(*self.geometric_center)
632  self._make_spheres_atoms(particle_infos_for_pdb, o, name, simo)
633  self.rmsf = {}
634 
635  def all_chains(self, simo):
636  """Yield all chains, including transformed ones"""
637  chain_for_comp = {}
638  for c in self.chains:
639  yield c
640  chain_for_comp[c.comp] = c
641  for tc in simo._transformed_components:
642  orig_chain = chain_for_comp.get(tc.original, None)
643  if orig_chain:
644  asym = simo.asym_units[tc.name]
645  c = _TransformedChain(orig_chain, asym, tc.transform)
646  c.comp = tc.name
647  yield c
648 
649  def _make_spheres_atoms(self, particle_infos_for_pdb, o, name, simo):
650  entity_for_chain = {}
651  comp_for_chain = {}
652  correct_asym = {}
653  for protname, chain_id in o.dictchain[name].items():
654  entity_for_chain[chain_id] = simo.entities[protname]
655  comp_for_chain[chain_id] = protname
656  # When doing multi-state modeling, the chain ID returned here
657  # (assigned sequentially) might not be correct (states may have
658  # gaps in the chain IDs). Map it to the correct asym unit.
659  correct_asym[chain_id] = simo.asym_units[protname]
660 
661  # Gather by chain ID (should be sorted by chain ID already)
662  self.chains = []
663  chain = None
664  excluder = None
665 
666  for (xyz, atom_type, residue_type, chain_id, residue_index,
667  all_indexes, radius) in particle_infos_for_pdb:
668  if chain is None or chain.pmi_chain_id != chain_id:
669  chain = _Chain(chain_id, correct_asym[chain_id])
670  chain.entity = entity_for_chain[chain_id]
671  chain.comp = comp_for_chain[chain_id]
672  self.chains.append(chain)
673  excluder = _Excluder(chain.comp, simo)
674  if not excluder.is_excluded(all_indexes if all_indexes
675  else [residue_index]):
676  chain.add(xyz, atom_type, residue_type, residue_index,
677  all_indexes, radius)
678 
679  def parse_rmsf_file(self, fname, component):
680  self.rmsf[component] = rmsf = {}
681  with open(fname) as fh:
682  for line in fh:
683  resnum, blocknum, val = line.split()
684  rmsf[int(resnum)] = (int(blocknum), float(val))
685 
686  def get_rmsf(self, component, indexes):
687  """Get the RMSF value for the given residue indexes."""
688  if not self.rmsf:
689  return None
690  rmsf = self.rmsf[component]
691  blocknums = dict.fromkeys(rmsf[ind][0] for ind in indexes)
692  if len(blocknums) != 1:
693  raise ValueError("Residue indexes %s aren't all in the same block"
694  % str(indexes))
695  return rmsf[indexes[0]][1]
696 
697  def get_atoms(self):
698  for chain in self.all_chains(self.simo):
699  pmi_offset = chain.asym_unit.entity.pmi_offset
700  for atom in chain.atoms:
701  (xyz, atom_type, residue_type, residue_index,
702  all_indexes, radius) = atom
703  pt = self.transform * xyz
704  yield ihm.model.Atom(asym_unit=chain.asym_unit,
705  seq_id=residue_index - pmi_offset,
706  atom_id=atom_type.get_string(),
707  type_symbol=None, # todo: get element
708  x=pt[0], y=pt[1], z=pt[2])
709 
710  def get_spheres(self):
711  for chain in self.all_chains(self.simo):
712  pmi_offset = chain.asym_unit.entity.pmi_offset
713  for sphere in chain.spheres:
714  (xyz, residue_type, residue_index,
715  all_indexes, radius) = sphere
716  if all_indexes is None:
717  all_indexes = (residue_index,)
718  pt = self.transform * xyz
719  yield ihm.model.Sphere(asym_unit=chain.asym_unit,
720  seq_id_range=(all_indexes[0] - pmi_offset,
721  all_indexes[-1] - pmi_offset),
722  x=pt[0], y=pt[1], z=pt[2], radius=radius,
723  rmsf=self.get_rmsf(chain.orig_comp, all_indexes))
724 
725 
726 class _AllProtocols(object):
727  def __init__(self, simo):
728  self.simo = weakref.proxy(simo)
729  # Lists of Protocols by state
730  self.protocols = OrderedDict()
731 
732  def add_protocol(self, state):
733  """Add a new Protocol"""
734  if state not in self.protocols:
735  self.protocols[state] = []
736  p = ihm.protocol.Protocol()
737  self.simo.system.orphan_protocols.append(p)
738  self.protocols[state].append(p)
739 
740  def add_step(self, step, state):
741  """Add a ProtocolStep to the last Protocol of the given State"""
742  if state not in self.protocols:
743  self.add_protocol(state)
744  protocol = self.get_last_protocol(state)
745  if len(protocol.steps) == 0:
746  step.num_models_begin = 0
747  else:
748  step.num_models_begin = protocol.steps[-1].num_models_end
749  protocol.steps.append(step)
750  step.id = len(protocol.steps)
751  # Assume that protocol uses all currently-defined datasets
752  step.dataset_group = self.simo.all_datasets.get_all_group(state)
753 
754  def add_postproc(self, step, state):
755  """Add a postprocessing step to the last protocol"""
756  protocol = self.get_last_protocol(state)
757  if len(protocol.analyses) == 0:
758  protocol.analyses.append(ihm.analysis.Analysis())
759  protocol.analyses[-1].steps.append(step)
760 
761  def get_last_protocol(self, state):
762  """Return the most recently-added _Protocol"""
763  return self.protocols[state][-1]
764 
765 
766 class _AllStartingModels(object):
767  def __init__(self, simo):
768  self.simo = simo
769  # dict of starting models (entire PDB files), collected from fragments,
770  # ordered by component name and state
771  self.models = OrderedDict()
772  self.output = IMP.pmi1.output.Output()
773 
774  def add_pdb_fragment(self, fragment):
775  """Add a starting model PDB fragment."""
776  comp = fragment.component
777  state = fragment.state
778  if comp not in self.models:
779  self.models[comp] = OrderedDict()
780  if state not in self.models[comp]:
781  self.models[comp][state] = []
782  models = self.models[comp][state]
783  if len(models) == 0 \
784  or models[-1].fragments[0].pdbname != fragment.pdbname:
785  model = self._add_model(fragment)
786  models.append(model)
787  else:
788  # Break circular ref between fragment and model
789  models[-1].fragments.append(weakref.proxy(fragment))
790  # Update residue range to cover all fragments
791  pmi_offset = models[-1].asym_unit.entity.pmi_offset
792  sid_begin = min(fragment.start + fragment.offset - pmi_offset,
793  models[-1].asym_unit.seq_id_range[0])
794  sid_end = max(fragment.end + fragment.offset - pmi_offset,
795  models[-1].asym_unit.seq_id_range[1])
796  models[-1].asym_unit = fragment.asym_unit.asym(sid_begin, sid_end)
797  fragment.starting_model = models[-1]
798 
799  def _add_model(self, f):
800  parser = ihm.metadata.PDBParser()
801  r = parser.parse_file(f.pdbname)
802 
803  self.simo._add_dataset(r['dataset'])
804  # We only want the templates that model the starting model chain
805  templates = r['templates'].get(f.chain, [])
806  for t in templates:
807  if t.alignment_file:
808  self.simo.system.locations.append(t.alignment_file)
809  if t.dataset:
810  self.simo._add_dataset(t.dataset)
811  pmi_offset = f.asym_unit.entity.pmi_offset
812  m = _StartingModel(
813  asym_unit=f.asym_unit.asym.pmi_range(f.start, f.end),
814  dataset=r['dataset'], asym_id=f.chain,
815  templates=templates, offset=f.offset + pmi_offset,
816  metadata=r['metadata'],
817  software=r['software'][0] if r['software'] else None,
818  script_file=r['script'])
819  m.fragments = [weakref.proxy(f)]
820  return m
821 
822 
823 class _StartingModel(ihm.startmodel.StartingModel):
824  def get_seq_dif(self):
825  return self._seq_dif
826 
827  def get_atoms(self):
828  pmi_offset = self.asym_unit.entity.pmi_offset
829  mh = IMP.mmcif.data._StartingModelAtomHandler(self.templates,
830  self.asym_unit)
831  for f in self.fragments:
832  sel = IMP.atom.Selection(f.starting_hier,
833  residue_indexes=list(range(f.start - f.offset,
834  f.end - f.offset + 1)))
835  for a in mh.get_ihm_atoms(sel.get_selected_particles(),
836  f.offset - pmi_offset):
837  yield a
838  self._seq_dif = mh._seq_dif
839 
840 
841 class _ReplicaExchangeAnalysisPostProcess(ihm.analysis.ClusterStep):
842  """Post processing using AnalysisReplicaExchange0 macro"""
843 
844  def __init__(self, rex, num_models_begin):
845  self.rex = rex
846  num_models_end = 0
847  for fname in self.get_all_stat_files():
848  with open(fname) as fh:
849  num_models_end += len(fh.readlines())
850  super(_ReplicaExchangeAnalysisPostProcess, self).__init__(
851  feature='RMSD', num_models_begin=num_models_begin,
852  num_models_end=num_models_end)
853 
854  def get_stat_file(self, cluster_num):
855  return os.path.join(self.rex._outputdir, "cluster.%d" % cluster_num,
856  'stat.out')
857 
858  def get_all_stat_files(self):
859  for i in range(self.rex._number_of_clusters):
860  yield self.get_stat_file(i)
861 
862 
863 class _ReplicaExchangeAnalysisEnsemble(ihm.model.Ensemble):
864  """Ensemble generated using AnalysisReplicaExchange0 macro"""
865 
866  num_models_deposited = None # Override base class property
867 
868  def __init__(self, pp, cluster_num, model_group, num_deposit):
869  with open(pp.get_stat_file(cluster_num)) as fh:
870  num_models = len(fh.readlines())
871  super(_ReplicaExchangeAnalysisEnsemble, self).__init__(
872  num_models=num_models,
873  model_group=model_group, post_process=pp,
874  clustering_feature=pp.feature,
875  name=model_group.name)
876  self.cluster_num = cluster_num
877  self.num_models_deposited = num_deposit
878 
879  def get_rmsf_file(self, component):
880  return os.path.join(self.post_process.rex._outputdir,
881  'cluster.%d' % self.cluster_num,
882  'rmsf.%s.dat' % component)
883 
884  def load_rmsf(self, model, component):
885  fname = self.get_rmsf_file(component)
886  if os.path.exists(fname):
887  model.parse_rmsf_file(fname, component)
888 
889  def get_localization_density_file(self, fname):
890  return os.path.join(self.post_process.rex._outputdir,
891  'cluster.%d' % self.cluster_num,
892  '%s.mrc' % fname)
893 
894  def load_localization_density(self, state, fname, select_tuple, asym_units):
895  fullpath = self.get_localization_density_file(fname)
896  if os.path.exists(fullpath):
897  details = "Localization density for %s %s" \
898  % (fname, self.model_group.name)
899  local_file = ihm.location.OutputFileLocation(fullpath,
900  details=details)
901  for s in select_tuple:
902  if isinstance(s, tuple) and len(s) == 3:
903  asym = asym_units[s[2]].pmi_range(s[0], s[1])
904  else:
905  asym = asym_units[s]
906  den = ihm.model.LocalizationDensity(file=local_file,
907  asym_unit=asym)
908  self.densities.append(den)
909 
910  def load_all_models(self, simo, state):
911  stat_fname = self.post_process.get_stat_file(self.cluster_num)
912  model_num = 0
913  with open(stat_fname) as fh:
914  stats = ast.literal_eval(fh.readline())
915  # Correct path
916  rmf_file = os.path.join(os.path.dirname(stat_fname),
917  "%d.rmf3" % model_num)
918  for c in state.all_modeled_components:
919  # todo: this only works with PMI 1
920  state._pmi_object.set_coordinates_from_rmf(c, rmf_file, 0,
921  force_rigid_update=True)
922  # todo: fill in other data from stat file, e.g. crosslink phi/psi
923  yield stats
924  model_num += 1
925  if model_num >= self.num_models_deposited:
926  return
927 
928  # todo: also support dRMS precision
929  def _get_precision(self):
930  precfile = os.path.join(self.post_process.rex._outputdir,
931  "precision.%d.%d.out" % (self.cluster_num,
932  self.cluster_num))
933  if not os.path.exists(precfile):
934  return ihm.unknown
935  # Fail if the precision.x.x.out file doesn't match the cluster
936  r = re.compile('All .*/cluster.%d/ average centroid distance ([\d\.]+)'
937  % self.cluster_num)
938  with open(precfile) as fh:
939  for line in fh:
940  m = r.match(line)
941  if m:
942  return float(m.group(1))
943 
944  precision = property(lambda self: self._get_precision(),
945  lambda self, val: None)
946 
947 class _SimpleEnsemble(ihm.model.Ensemble):
948  """Simple manually-created ensemble"""
949 
950  num_models_deposited = None # Override base class property
951 
952  def __init__(self, pp, model_group, num_models, drmsd,
953  num_models_deposited, ensemble_file):
954  super(_SimpleEnsemble, self).__init__(
955  model_group=model_group, post_process=pp, num_models=num_models,
956  file=ensemble_file, precision=drmsd, name=model_group.name,
957  clustering_feature='dRMSD')
958  self.num_models_deposited = num_models_deposited
959 
960  def load_localization_density(self, state, component, asym, local_file):
961  den = ihm.model.LocalizationDensity(file=local_file, asym_unit=asym)
962  self.densities.append(den)
963 
964 
965 class _EntityMapper(dict):
966  """Handle mapping from IMP components to CIF entities.
967  Multiple components may map to the same entity if they share sequence."""
968  def __init__(self, system):
969  super(_EntityMapper, self).__init__()
970  self._sequence_dict = {}
971  self._entities = []
972  self.system = system
973 
974  def add(self, component_name, sequence, offset):
975  def entity_seq(sequence):
976  # Map X to UNK
977  if 'X' in sequence:
978  return ['UNK' if s == 'X' else s for s in sequence]
979  else:
980  return sequence
981  if sequence not in self._sequence_dict:
982  # Use the name of the first component, stripped of any copy number,
983  # as the description of the entity
984  d = component_name.split("@")[0].split(".")[0]
985  entity = Entity(entity_seq(sequence), description=d,
986  pmi_offset=offset)
987  self.system.entities.append(entity)
988  self._sequence_dict[sequence] = entity
989  self[component_name] = self._sequence_dict[sequence]
990 
991 
992 class _TransformedComponent(object):
993  def __init__(self, name, original, transform):
994  self.name, self.original, self.transform = name, original, transform
995 
996 
997 class _SimpleRef(object):
998  """Class with similar interface to weakref.ref, but keeps a strong ref"""
999  def __init__(self, ref):
1000  self.ref = ref
1001  def __call__(self):
1002  return self.ref
1003 
1004 
1005 class _State(ihm.model.State):
1006  """Representation of a single state in the system."""
1007 
1008  def __init__(self, pmi_object, po):
1009  # Point to the PMI object for this state. Use a weak reference
1010  # since the state object typically points to us too, so we need
1011  # to break the reference cycle. In PMI1 this will be a
1012  # Representation object; in PMI2 it is the PMI2 State object itself.
1013  self._pmi_object = weakref.proxy(pmi_object)
1014  if hasattr(pmi_object, 'state'):
1015  # Need a strong ref to pmi_object.state to prevent it from being
1016  # cleaned up when doing PMI1 multistate modeling
1017  self._pmi_state = _SimpleRef(pmi_object.state)
1018  else:
1019  self._pmi_state = weakref.ref(pmi_object)
1020  # Preserve PMI state name
1021  old_name = self.name
1022  super(_State, self).__init__(experiment_type='Fraction of bulk')
1023  self.name = old_name
1024 
1025  # The assembly of all components modeled by IMP in this state.
1026  # This may be smaller than the complete assembly.
1027  self.modeled_assembly = ihm.Assembly(
1028  name="Modeled assembly",
1029  description=self.get_postfixed_name(
1030  "All components modeled by IMP"))
1031  po.system.orphan_assemblies.append(self.modeled_assembly)
1032 
1033  self.all_modeled_components = []
1034 
1035  def __hash__(self):
1036  return hash(self._pmi_state())
1037  def __eq__(self, other):
1038  return self._pmi_state() == other._pmi_state()
1039 
1040  def add_model_group(self, group):
1041  self.append(group)
1042  return group
1043 
1044  def get_prefixed_name(self, name):
1045  """Prefix the given name with the state name, if available."""
1046  if self.short_name:
1047  return self.short_name + ' ' + name
1048  else:
1049  # Can't use capitalize() since that un-capitalizes everything
1050  # but the first letter
1051  return name[0].upper() + name[1:] if name else ''
1052 
1053  def get_postfixed_name(self, name):
1054  """Postfix the given name with the state name, if available."""
1055  if self.short_name:
1056  return "%s in state %s" % (name, self.short_name)
1057  else:
1058  return name
1059 
1060  short_name = property(lambda self: self._pmi_state().short_name)
1061  long_name = property(lambda self: self._pmi_state().long_name)
1062  def __get_name(self):
1063  return self._pmi_state().long_name
1064  def __set_name(self, val):
1065  self._pmi_state().long_name = val
1066  name = property(__get_name, __set_name)
1067 
1068 
1069 class Entity(ihm.Entity):
1070  """A single entity in the system.
1071  This functions identically to the base ihm.Entity class, but it
1072  allows identifying residues by either the IHM numbering scheme
1073  (seq_id, which is always contiguous starting at 1) or by the PMI
1074  scheme (which is similar, but may not start at 1 if the sequence in
1075  the FASTA file has one or more N-terminal gaps"""
1076  def __init__(self, sequence, pmi_offset, *args, **keys):
1077  # Offset between PMI numbering and IHM; <pmi_#> = <ihm_#> + pmi_offset
1078  # (pmi_offset is also the number of N-terminal gaps in the FASTA file)
1079  self.pmi_offset = pmi_offset
1080  super(Entity, self).__init__(sequence, *args, **keys)
1081 
1082  def pmi_residue(self, res_id):
1083  """Return a single IHM residue indexed using PMI numbering"""
1084  return self.residue(res_id - self.pmi_offset)
1085 
1086  def pmi_range(self, res_id_begin, res_id_end):
1087  """Return a range of IHM residues indexed using PMI numbering"""
1088  off = self.pmi_offset
1089  return self(res_id_begin - off, res_id_end - off)
1090 
1091 
1092 class AsymUnit(ihm.AsymUnit):
1093  """A single asymmetric unit in the system.
1094  This functions identically to the base ihm.AsymUnit class, but it
1095  allows identifying residues by either the IHM numbering scheme
1096  (seq_id, which is always contiguous starting at 1) or by the PMI
1097  scheme (which is similar, but may not start at 1 if the sequence in
1098  the FASTA file has one or more N-terminal gaps"""
1099 
1100  def __init__(self, entity, *args, **keys):
1101  super(AsymUnit, self).__init__(
1102  entity, auth_seq_id_map=entity.pmi_offset, *args, **keys)
1103 
1104  def pmi_residue(self, res_id):
1105  """Return a single IHM residue indexed using PMI numbering"""
1106  return self.residue(res_id - self.entity.pmi_offset)
1107 
1108  def pmi_range(self, res_id_begin, res_id_end):
1109  """Return a range of IHM residues indexed using PMI numbering"""
1110  off = self.entity.pmi_offset
1111  return self(res_id_begin - off, res_id_end - off)
1112 
1113 
1115  """Class to encode a modeling protocol as mmCIF.
1116 
1117  IMP has basic support for writing out files in mmCIF format, for
1118  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
1119  After creating an instance of this class, attach it to an
1120  IMP.pmi1.representation.Representation object. After this, any
1121  generated models and metadata are automatically collected and
1122  output as mmCIF.
1123  """
1124  def __init__(self, fh):
1125  # Ultimately, collect data in an ihm.System object
1126  self.system = ihm.System()
1127  self._state_group = ihm.model.StateGroup()
1128  self.system.state_groups.append(self._state_group)
1129 
1130  self.fh = fh
1131  self._state_ensemble_offset = 0
1132  self._each_metadata = [] # list of metadata for each representation
1133  self._file_datasets = []
1134  self._main_script = os.path.abspath(sys.argv[0])
1135 
1136  # Point to the main modeling script
1137  loc = ihm.location.WorkflowFileLocation(path=self._main_script,
1138  details="The main integrative modeling script")
1139  self.system.locations.append(loc)
1140 
1141  self._states = {}
1142  self.__asym_states = {}
1143  self._working_directory = os.getcwd()
1144  self.default_representation = self.create_representation(
1145  "Default representation")
1146  self.entities = _EntityMapper(self.system)
1147  # Mapping from component names to ihm.AsymUnit
1148  self.asym_units = {}
1149  self._all_components = {}
1150  self.all_modeled_components = []
1151  self._transformed_components = []
1152  self.sequence_dict = {}
1153 
1154  # Common geometric objects used in PMI systems
1155  self._xy_plane = ihm.geometry.XYPlane()
1156  self._xz_plane = ihm.geometry.XZPlane()
1157  self._z_axis = ihm.geometry.ZAxis()
1158  self._center_origin = ihm.geometry.Center(0,0,0)
1159  self._identity_transform = ihm.geometry.Transformation.identity()
1160 
1161  # Coordinates to exclude
1162  self._exclude_coords = {}
1163 
1164  self.all_representations = _AllModelRepresentations(self)
1165  self.all_protocols = _AllProtocols(self)
1166  self.all_datasets = _AllDatasets(self.system)
1167  self.all_starting_models = _AllStartingModels(self)
1168 
1169  self.all_software = _AllSoftware(self.system)
1170 
1171  def create_representation(self, name):
1172  """Create a new Representation and return it. This can be
1173  passed to add_model(), add_bead_element() or add_pdb_element()."""
1174  r = ihm.representation.Representation()
1175  self.system.orphan_representations.append(r)
1176  return r
1177 
1178  def exclude_coordinates(self, component, seqrange):
1179  """Don't record coordinates for the given domain.
1180  Coordinates for the given domain (specified by a component name
1181  and a 2-element tuple giving the start and end residue numbers)
1182  will be excluded from the mmCIF file. This can be used to exclude
1183  parts of the structure that weren't well resolved in modeling.
1184  Any bead or residue that lies wholly within this range will be
1185  excluded. Multiple ranges for a given component can be excluded
1186  by calling this method multiple times."""
1187  if component not in self._exclude_coords:
1188  self._exclude_coords[component] = []
1189  self._exclude_coords[component].append(seqrange)
1190 
1191  def _is_excluded(self, component, start, end):
1192  """Return True iff this chunk of sequence should be excluded"""
1193  for seqrange in self._exclude_coords.get(component, ()):
1194  if start >= seqrange[0] and end <= seqrange[1]:
1195  return True
1196 
1197  def _add_state(self, state):
1198  """Create a new state and return a pointer to it."""
1199  self._state_ensemble_offset = len(self.system.ensembles)
1200  s = _State(state, self)
1201  self._state_group.append(s)
1202  self._last_state = s
1203  return s
1204 
1205  def get_file_dataset(self, fname):
1206  for d in self._file_datasets:
1207  fd = d.get(os.path.abspath(fname), None)
1208  if fd:
1209  return fd
1210 
1211  def _get_chain_for_component(self, name, output):
1212  """Get the chain ID for a component, if any."""
1213  # todo: handle multiple copies
1214  if name in self.asym_units:
1215  return self.asym_units[name]._id
1216  else:
1217  # A non-modeled component doesn't have a chain ID
1218  return None
1219 
1220  def _get_assembly_comps(self, assembly):
1221  """Get the names of the components in the given assembly"""
1222  # component name = asym_unit.details
1223  comps = {}
1224  for ca in assembly:
1225  comps[ca.details] = None
1226  return comps
1227 
1228  def create_transformed_component(self, state, name, original, transform):
1229  """Make a new component that's a transformed copy of another.
1230  All representation for the existing component is copied to the
1231  new one."""
1232  assembly_comps = self._get_assembly_comps(state.modeled_assembly)
1233  if name in assembly_comps:
1234  raise ValueError("Component %s already exists" % name)
1235  elif original not in assembly_comps:
1236  raise ValueError("Original component %s does not exist" % original)
1237  self.create_component(state, name, True)
1238  self.add_component_sequence(state, name, self.sequence_dict[original])
1239  self._transformed_components.append(_TransformedComponent(
1240  name, original, transform))
1241  self.all_representations.copy_component(state, name, original,
1242  self.asym_units[name])
1243 
1244  def create_component(self, state, name, modeled):
1245  new_comp = name not in self._all_components
1246  self._all_components[name] = None
1247  if modeled:
1248  state.all_modeled_components.append(name)
1249  if new_comp:
1250  self.asym_units[name] = None # assign asym once we get sequence
1251  self.all_modeled_components.append(name)
1252 
1253  def add_component_sequence(self, state, name, seq):
1254  def get_offset(seq):
1255  # Count length of gaps at start of sequence
1256  for i in range(len(seq)):
1257  if seq[i] != '-':
1258  return seq[i:], i
1259  seq, offset = get_offset(seq)
1260  if name in self.sequence_dict:
1261  if self.sequence_dict[name] != seq:
1262  raise ValueError("Sequence mismatch for component %s" % name)
1263  else:
1264  self.sequence_dict[name] = seq
1265  self.entities.add(name, seq, offset)
1266  if name in self.asym_units:
1267  if self.asym_units[name] is None:
1268  # Set up a new asymmetric unit for this component
1269  entity = self.entities[name]
1270  asym = AsymUnit(entity, details=name)
1271  self.system.asym_units.append(asym)
1272  self.asym_units[name] = asym
1273  state.modeled_assembly.append(self.asym_units[name])
1274 
1275  def _add_restraint_model_fits(self):
1276  """Add fits to restraints for all known models"""
1277  for group, m in self.system._all_models():
1278  if m._is_restrained:
1279  for r in self.system.restraints:
1280  if hasattr(r, 'add_fits_from_model_statfile'):
1281  r.add_fits_from_model_statfile(m)
1282 
1283  def _add_em2d_raw_micrographs(self):
1284  """If the deprecated metadata.EMMicrographsDataset class was used,
1285  transfer its data to the EM2D restraint."""
1286  for r in self.system.restraints:
1287  if isinstance(r, _EM2DRestraint):
1288  for d in r.dataset.parents:
1289  if isinstance(d, IMP.pmi1.metadata.EMMicrographsDataset):
1290  r.number_raw_micrographs = d.number
1291  if isinstance(d.location,
1292  IMP.pmi1.metadata.FileLocation):
1293  d.location.content_type = \
1294  ihm.location.InputFileLocation.content_type
1295 
1296  def flush(self):
1297  self.finalize()
1298  ihm.dumper.write(self.fh, [self.system])
1299 
1300  def finalize(self):
1301  """Do any final processing on the class hierarchy.
1302  After calling this method, the `system` member (an instance
1303  of `ihm.System`) completely reproduces the PMI modeling, and
1304  can be written out to an mmCIF file with `ihm.dumper.write`,
1305  and/or modified using the ihm API."""
1306  self._add_restraint_model_fits()
1307 
1308  self._add_em2d_raw_micrographs()
1309 
1310  # Add metadata to ihm.System
1311  self.system.software.extend(m for m in self._metadata
1312  if isinstance(m, ihm.Software))
1313  self.system.citations.extend(m for m in self._metadata
1314  if isinstance(m, ihm.Citation))
1315  self.system.locations.extend(m for m in self._metadata
1316  if isinstance(m, ihm.location.FileLocation))
1317 
1318  # Point all locations to repos, if applicable
1319  all_repos = [m for m in self._metadata
1320  if isinstance(m, ihm.location.Repository)]
1321  self.system.update_locations_in_repositories(all_repos)
1322 
1323  def add_pdb_element(self, state, name, start, end, offset, pdbname,
1324  chain, hier, representation=None):
1325  if self._is_excluded(name, start, end):
1326  return
1327  if representation is None:
1328  representation = self.default_representation
1329  asym = self.asym_units[name]
1330  p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
1331  hier, asym)
1332  self.all_representations.add_fragment(state, representation, p)
1333  self.all_starting_models.add_pdb_fragment(p)
1334 
1335  def add_bead_element(self, state, name, start, end, num, hier,
1336  representation=None):
1337  if self._is_excluded(name, start, end):
1338  return
1339  if representation is None:
1340  representation = self.default_representation
1341  asym = self.asym_units[name]
1342  pmi_offset = asym.entity.pmi_offset
1343  b = _BeadsFragment(state, name, start - pmi_offset, end - pmi_offset,
1344  num, hier, asym)
1345  self.all_representations.add_fragment(state, representation, b)
1346 
1347  def get_cross_link_group(self, pmi_restraint):
1348  r = _CrossLinkRestraint(pmi_restraint)
1349  self.system.restraints.append(r)
1350  self._add_restraint_dataset(r) # so that all-dataset group works
1351  return r
1352 
1353  def add_experimental_cross_link(self, r1, c1, r2, c2, rsr):
1354  if c1 not in self._all_components or c2 not in self._all_components:
1355  # Crosslink refers to a component we didn't model
1356  # As a quick hack, just ignore it.
1357  # todo: need to add an entity for this anyway (so will need the
1358  # sequence, and add to struct_assembly)
1359  return None
1360  e1 = self.entities[c1]
1361  e2 = self.entities[c2]
1362  xl = ihm.restraint.ExperimentalCrossLink(residue1=e1.pmi_residue(r1),
1363  residue2=e2.pmi_residue(r2))
1364  rsr.experimental_cross_links.append([xl])
1365  return xl
1366 
1367  def add_cross_link(self, state, ex_xl, p1, p2, length, sigma1_p, sigma2_p,
1368  psi_p, rsr):
1369  # Map p1/p2 to asym units
1370  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1371  d = ihm.restraint.UpperBoundDistanceRestraint(length)
1372 
1373  if _get_by_residue(p1) and _get_by_residue(p2):
1374  cls = _ResidueCrossLink
1375  else:
1376  cls = _FeatureCrossLink
1377  xl = cls(ex_xl, asym1=asym[p1], asym2=asym[p2], distance=d,
1378  restrain_all=True)
1379  # Needed to look up fits later
1380  xl.psi_p, xl.sigma1_p, xl.sigma2_p = psi_p, sigma1_p, sigma2_p
1381  rsr.cross_links.append(xl)
1382 
1383  def add_replica_exchange(self, state, rex):
1384  # todo: allow for metadata to say how many replicas were used in the
1385  # actual experiment, and how many independent runs were carried out
1386  # (use these as multipliers to get the correct total number of
1387  # output models)
1388  self.all_protocols.add_step(_ReplicaExchangeProtocolStep(state, rex),
1389  state)
1390 
1391  def _add_simple_dynamics(self, num_models_end, method):
1392  # Always assumed that we're dealing with the last state
1393  state = self._last_state
1394  self.all_protocols.add_step(_SimpleProtocolStep(state, num_models_end,
1395  method), state)
1396 
1397  def _add_protocol(self):
1398  # Always assumed that we're dealing with the last state
1399  state = self._last_state
1400  self.all_protocols.add_protocol(state)
1401 
1402  def _add_dataset(self, dataset):
1403  return self.all_datasets.add(self._last_state, dataset)
1404 
1405  def _add_restraint_dataset(self, restraint):
1406  return self.all_datasets.add_restraint(self._last_state, restraint)
1407 
1408  def _add_simple_postprocessing(self, num_models_begin, num_models_end):
1409  # Always assumed that we're dealing with the last state
1410  state = self._last_state
1411  pp = ihm.analysis.ClusterStep('RMSD', num_models_begin, num_models_end)
1412  self.all_protocols.add_postproc(pp, state)
1413  return pp
1414 
1415  def _add_no_postprocessing(self, num_models):
1416  # Always assumed that we're dealing with the last state
1417  state = self._last_state
1418  pp = ihm.analysis.EmptyStep()
1419  pp.num_models_begin = pp.num_models_end = num_models
1420  self.all_protocols.add_postproc(pp, state)
1421  return pp
1422 
1423  def _add_simple_ensemble(self, pp, name, num_models, drmsd,
1424  num_models_deposited, localization_densities,
1425  ensemble_file):
1426  """Add an ensemble generated by ad hoc methods (not using PMI).
1427  This is currently only used by the Nup84 system."""
1428  # Always assumed that we're dealing with the last state
1429  state = self._last_state
1430  group = ihm.model.ModelGroup(name=state.get_postfixed_name(name))
1431  state.add_model_group(group)
1432  if ensemble_file:
1433  # Deprecated Location class does not state input vs output
1434  if isinstance(ensemble_file, IMP.pmi1.metadata.FileLocation):
1435  ensemble_file.content_type = \
1436  ihm.location.OutputFileLocation.content_type
1437  self.system.locations.append(ensemble_file)
1438  e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
1439  ensemble_file)
1440  self.system.ensembles.append(e)
1441  for c in state.all_modeled_components:
1442  den = localization_densities.get(c, None)
1443  if den:
1444  # Deprecated Location class does not state input vs output
1445  if isinstance(den, IMP.pmi1.metadata.FileLocation):
1446  den.content_type = \
1447  ihm.location.OutputFileLocation.content_type
1448  e.load_localization_density(state, c, self.asym_units[c], den)
1449  return e
1450 
1451  def set_ensemble_file(self, i, location):
1452  """Point a previously-created ensemble to an 'all-models' file.
1453  This could be a trajectory such as DCD, an RMF, or a multimodel
1454  PDB file."""
1455  # Deprecated Location class does not state input vs output
1456  if isinstance(location, IMP.pmi1.metadata.FileLocation):
1457  location.content_type = ihm.location.OutputFileLocation.content_type
1458  self.system.locations.append(location)
1459  # Ensure that we point to an ensemble related to the current state
1460  ind = i + self._state_ensemble_offset
1461  self.system.ensembles[ind].file = location
1462 
1463  def add_replica_exchange_analysis(self, state, rex, density_custom_ranges):
1464  # todo: add prefilter as an additional postprocess step (complication:
1465  # we don't know how many models it filtered)
1466  # todo: postpone rmsf/density localization extraction until after
1467  # relevant methods are called (currently it is done from the
1468  # constructor)
1469  protocol = self.all_protocols.get_last_protocol(state)
1470  num_models = protocol.steps[-1].num_models_end
1471  pp = _ReplicaExchangeAnalysisPostProcess(rex, num_models)
1472  self.all_protocols.add_postproc(pp, state)
1473  for i in range(rex._number_of_clusters):
1474  group = ihm.model.ModelGroup(name=state.get_prefixed_name(
1475  'cluster %d' % (i + 1)))
1476  state.add_model_group(group)
1477  # todo: make # of models to deposit configurable somewhere
1478  e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
1479  self.system.ensembles.append(e)
1480  # Add localization density info if available
1481  for fname, stuple in sorted(density_custom_ranges.items()):
1482  e.load_localization_density(state, fname, stuple,
1483  self.asym_units)
1484  for stats in e.load_all_models(self, state):
1485  m = self.add_model(group)
1486  # Since we currently only deposit 1 model, it is the
1487  # best scoring one
1488  m.name = 'Best scoring model'
1489  m.stats = stats
1490  # Add RMSF info if available
1491  for c in state.all_modeled_components:
1492  e.load_rmsf(m, c)
1493 
1494  def _get_subassembly(self, comps, name, description):
1495  """Get an Assembly consisting of the given components.
1496  `compdict` is a dictionary of the components to add, where keys
1497  are the component names and values are the sequence ranges (or
1498  None to use all residues in the component)."""
1499  asyms = []
1500  for comp, seqrng in comps.items():
1501  a = self.asym_units[comp]
1502  asyms.append(a if seqrng is None else a(*seqrng))
1503 
1504  a = ihm.Assembly(asyms, name=name, description=description)
1505  return a
1506 
1507  def _add_foxs_restraint(self, model, comp, seqrange, dataset, rg, chi,
1508  details):
1509  """Add a basic FoXS fit. This is largely intended for use from the
1510  NPC application."""
1511  assembly = self._get_subassembly({comp:seqrange},
1512  name="SAXS subassembly",
1513  description="All components that fit SAXS data")
1514  r = ihm.restraint.SASRestraint(dataset, assembly, segment=False,
1515  fitting_method='FoXS', fitting_atom_type='Heavy atoms',
1516  multi_state=False, radius_of_gyration=rg, details=details)
1517  r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi)
1518  self.system.restraints.append(r)
1519  self._add_restraint_dataset(r) # so that all-dataset group works
1520 
1521  def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
1522  image_resolution, projection_number,
1523  micrographs_number):
1524  r = _EM2DRestraint(state, r, i, resolution, pixel_size,
1525  image_resolution, projection_number,
1526  micrographs_number)
1527  self.system.restraints.append(r)
1528  self._add_restraint_dataset(r) # so that all-dataset group works
1529 
1530  def add_em3d_restraint(self, state, target_ps, densities, pmi_restraint):
1531  # todo: need to set allow_duplicates on this dataset?
1532  r = _EM3DRestraint(self, state, pmi_restraint, target_ps, densities)
1533  self.system.restraints.append(r)
1534  self._add_restraint_dataset(r) # so that all-dataset group works
1535 
1536  def add_zaxial_restraint(self, state, ps, lower_bound, upper_bound,
1537  sigma, pmi_restraint):
1538  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1539  sigma, pmi_restraint, self._xy_plane)
1540 
1541  def add_yaxial_restraint(self, state, ps, lower_bound, upper_bound,
1542  sigma, pmi_restraint):
1543  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1544  sigma, pmi_restraint, self._xz_plane)
1545 
1546  def add_xyradial_restraint(self, state, ps, lower_bound, upper_bound,
1547  sigma, pmi_restraint):
1548  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1549  sigma, pmi_restraint, self._z_axis)
1550 
1551  def _add_geometric_restraint(self, state, ps, lower_bound, upper_bound,
1552  sigma, pmi_restraint, geom):
1553  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1554  r = _GeometricRestraint(self, state, pmi_restraint, geom,
1555  asym.get_feature(ps),
1556  ihm.restraint.LowerUpperBoundDistanceRestraint(
1557  lower_bound, upper_bound),
1558  sigma)
1559  self.system.restraints.append(r)
1560  self._add_restraint_dataset(r) # so that all-dataset group works
1561 
1562  def _get_membrane(self, tor_R, tor_r, tor_th):
1563  """Get an object representing a half-torus membrane"""
1564  if not hasattr(self, '_seen_membranes'):
1565  self._seen_membranes = {}
1566  # If a membrane already exists with all dimensions within 0.01 of
1567  # this one, reuse it
1568  membrane_id = tuple(int(x * 100.) for x in (tor_R, tor_r, tor_th))
1569  if membrane_id not in self._seen_membranes:
1570  m = ihm.geometry.HalfTorus(center=self._center_origin,
1571  transformation=self._identity_transform,
1572  major_radius=tor_R, minor_radius=tor_r, thickness=tor_th,
1573  inner=True, name='Membrane')
1574  self._seen_membranes[membrane_id] = m
1575  return self._seen_membranes[membrane_id]
1576 
1577  def add_membrane_surface_location_restraint(
1578  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1579  self._add_membrane_restraint(state, ps, tor_R, tor_r, tor_th, sigma,
1580  pmi_restraint, ihm.restraint.UpperBoundDistanceRestraint(0.))
1581 
1582  def add_membrane_exclusion_restraint(
1583  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1584  self._add_membrane_restraint(state, ps, tor_R, tor_r, tor_th, sigma,
1585  pmi_restraint, ihm.restraint.LowerBoundDistanceRestraint(0.))
1586 
1587  def _add_membrane_restraint(self, state, ps, tor_R, tor_r, tor_th,
1588  sigma, pmi_restraint, rsr):
1589  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1590  r = _GeometricRestraint(self, state, pmi_restraint,
1591  self._get_membrane(tor_R, tor_r, tor_th),
1592  asym.get_feature(ps), rsr, sigma)
1593  self.system.restraints.append(r)
1594  self._add_restraint_dataset(r) # so that all-dataset group works
1595 
1596  def add_model(self, group, assembly=None, representation=None):
1597  state = self._last_state
1598  if representation is None:
1599  representation = self.default_representation
1600  protocol = self.all_protocols.get_last_protocol(state)
1601  m = _Model(state.prot, self, protocol,
1602  assembly if assembly else state.modeled_assembly,
1603  representation)
1604  group.append(m)
1605  return m
1606 
1607  def _update_locations(self, filelocs):
1608  """Update FileLocation to point to a parent repository, if any"""
1609  all_repos = [m for m in self._metadata
1610  if isinstance(m, ihm.location.Repository)]
1611  for fileloc in filelocs:
1612  ihm.location.Repository._update_in_repos(fileloc, all_repos)
1613 
1614  _metadata = property(lambda self:
1615  itertools.chain.from_iterable(self._each_metadata))
Select non water and non hydrogen atoms.
Definition: pdb.h:245
Simple 3D transformation class.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:158
Classes for attaching metadata to PMI objects.
Definition: pmi1/metadata.py:1
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
def create_representation
Create a new Representation and return it.
Definition: /mmcif.py:1171
def finalize
Do any final processing on the class hierarchy.
Definition: /mmcif.py:1300
Class for easy writing of PDBs, RMFs, and stat files.
Definition: /output.py:65
Class to encode a modeling protocol as mmCIF.
Definition: /mmcif.py:1120
A single asymmetric unit in the system.
Definition: /mmcif.py:1096
def exclude_coordinates
Don't record coordinates for the given domain.
Definition: /mmcif.py:1181
Base class for capturing a modeling protocol.
Definition: /output.py:40
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: /mmcif.py:1082
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:731
void read_pdb(TextInput input, int model, Hierarchy h)
Classes to represent data structures used in mmCIF.
Definition: data.py:1
void add_restraint(RMF::FileHandle fh, Restraint *hs)
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
Miscellaneous utilities.
Definition: /tools.py:1
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
Definition: /mmcif.py:1451
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: /mmcif.py:1086
Basic utilities for handling cryo-electron microscopy 3D density maps.
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: /mmcif.py:1104
def create_transformed_component
Make a new component that's a transformed copy of another.
Definition: /mmcif.py:1228
3D rotation class.
Definition: Rotation3D.h:51
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Representation of the system.
A decorator for a residue.
Definition: Residue.h:137
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
A single entity in the system.
Definition: /mmcif.py:1069
VectorD< 3 > Vector3D
Definition: VectorD.h:421
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: /mmcif.py:1108
Classes for writing output files and processing them.
Definition: /output.py:1
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:189
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...