IMP logo
IMP Reference Guide  2.17.0
The Integrative Modeling Platform
/mmcif.py
1 """@namespace IMP.pmi1.mmcif
2  @brief Support for the mmCIF file format.
3 
4  IMP has basic support for writing out files in mmCIF format, for
5  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
6  mmCIF files are currently generated by creating an
7  IMP.pmi1.mmcif.ProtocolOutput class, and attaching it to an
8  IMP.pmi1.representation.Representation object, after which any
9  generated models and metadata are collected and output as mmCIF.
10 """
11 
12 from __future__ import print_function
13 import copy
14 import IMP.core
15 import IMP.algebra
16 import IMP.atom
17 import IMP.em
18 import IMP.isd
20 import IMP.pmi1.tools
21 from IMP.pmi1.tools import OrderedDict
22 import IMP.pmi1.output
23 import IMP.pmi1.metadata
24 import IMP.mmcif.data
25 import re
26 import ast
27 import sys
28 import os
29 import textwrap
30 import weakref
31 import operator
32 import itertools
33 import ihm.location
34 import ihm.dataset
35 import ihm.dumper
36 import ihm.metadata
37 import ihm.startmodel
38 import ihm.model
39 import ihm.protocol
40 import ihm.analysis
41 import ihm.representation
42 import ihm.geometry
43 import ihm.cross_linkers
44 
45 def _assign_id(obj, seen_objs, obj_by_id):
46  """Assign a unique ID to obj, and track all ids in obj_by_id."""
47  if obj not in seen_objs:
48  if not hasattr(obj, 'id'):
49  obj_by_id.append(obj)
50  obj.id = len(obj_by_id)
51  seen_objs[obj] = obj.id
52  else:
53  obj.id = seen_objs[obj]
54 
55 
56 def _get_by_residue(p):
57  """Determine whether the given particle represents a specific residue
58  or a more coarse-grained object."""
60 
61 
62 class _ComponentMapper(object):
63  """Map a Particle to a component name"""
64  def __init__(self, prot):
65  self.o = IMP.pmi1.output.Output()
66  self.prot = prot
67  self.name = 'cif-output'
68  self.o.dictionary_pdbs[self.name] = self.prot
69  self.o._init_dictchain(self.name, self.prot, multichar_chain=True)
70 
71  def __getitem__(self, p):
72  protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
73  return protname
74 
75 
76 class _AsymMapper(object):
77  """Map a Particle to an asym_unit"""
78  def __init__(self, simo, prot):
79  self.simo = simo
80  self._cm = _ComponentMapper(prot)
81 
82  def __getitem__(self, p):
83  protname = self._cm[p]
84  return self.simo.asym_units[protname]
85 
86  def get_feature(self, ps):
87  """Get an ihm.restraint.Feature that covers the given particles"""
88  # todo: handle things other than residues
89  rngs = []
90  for p in ps:
91  asym = self[p]
92  # todo: handle overlapping or adjoining ranges
94  rind = IMP.atom.Residue(p).get_index()
95  rngs.append(asym(rind, rind))
97  # PMI Fragments always contain contiguous residues
98  rinds = IMP.atom.Fragment(p).get_residue_indexes()
99  rngs.append(asym(rinds[0], rinds[-1]))
100  else:
101  raise ValueError("Unsupported particle type %s" % str(p))
102  return ihm.restraint.ResidueFeature(rngs)
103 
104 
105 class _AllSoftware(object):
106  def __init__(self, system):
107  self.system = system
108  self.modeller_used = self.phyre2_used = False
109  imp = ihm.Software(
110  name="Integrative Modeling Platform (IMP)",
111  version=IMP.__version__,
112  classification="integrative model building",
113  description="integrative model building",
114  location='https://integrativemodeling.org')
115  pmi = ihm.Software(
116  name="IMP PMI module",
117  version=IMP.pmi1.__version__,
118  classification="integrative model building",
119  description="integrative model building",
120  location='https://integrativemodeling.org')
121  self.system.software.extend([imp, pmi])
122  # Only recent versions of python-ihm support adding citations for
123  # software
124  if hasattr(imp, 'citation'):
125  if sys.version_info[0] > 2:
126  # Don't include UTF8 characters in source; it confuses Python 2
127  javi = 'Vel\u00e1zquez-Muriel J'
128  else:
129  javi = 'Velazquez-Muriel J'
130  imp.citation = ihm.Citation(
131  pmid='22272186',
132  title='Putting the pieces together: integrative modeling '
133  'platform software for structure determination of '
134  'macromolecular assemblies',
135  journal='PLoS Biol', volume=10, page_range='e1001244',
136  year=2012,
137  authors=['Russel D', 'Lasker K', 'Webb B', javi, 'Tjioe E',
138  'Schneidman-Duhovny D', 'Peterson B', 'Sali A'],
139  doi='10.1371/journal.pbio.1001244')
140  pmi.citation = ihm.Citation(
141  pmid='31396911',
142  title='Modeling Biological Complexes Using Integrative '
143  'Modeling Platform.',
144  journal='Methods Mol Biol', volume=2022, page_range=(353, 377),
145  year=2019,
146  authors=['Saltzberg D', 'Greenberg CH', 'Viswanath S',
147  'Chemmama I', 'Webb B', 'Pellarin R', 'Echeverria I',
148  'Sali A'],
149  doi='10.1007/978-1-4939-9608-7_15')
150 
151  def set_modeller_used(self, version, date):
152  if self.modeller_used:
153  return
154  self.modeller_used = True
155  s = ihm.Software(
156  name='MODELLER', classification='comparative modeling',
157  description='Comparative modeling by satisfaction '
158  'of spatial restraints, build ' + date,
159  location='https://salilab.org/modeller/', version=version)
160  self.system.software.append(s)
161  if hasattr(s, 'citation'):
162  s.citation = ihm.Citation(
163  pmid='8254673',
164  title='Comparative protein modelling by satisfaction of '
165  'spatial restraints.',
166  journal='J Mol Biol', volume=234, page_range=(779, 815),
167  year=1993, authors=['Sali A', 'Blundell TL'],
168  doi='10.1006/jmbi.1993.1626')
169 
170  def set_phyre2_used(self):
171  if self.phyre2_used:
172  return
173  self.phyre2_used = True
174  s = ihm.Software(
175  name='Phyre2', classification='protein homology modeling',
176  description='Protein Homology/analogY Recognition Engine V 2.0',
177  version='2.0', location='http://www.sbg.bio.ic.ac.uk/~phyre2/')
178  if hasattr(s, 'citation'):
179  s.citation = ihm.Citation(
180  pmid='25950237',
181  title='The Phyre2 web portal for protein modeling, '
182  'prediction and analysis.',
183  journal='Nat Protoc', volume=10, page_range=(845, 858),
184  authors=['Kelley LA', 'Mezulis S', 'Yates CM', 'Wass MN',
185  'Sternberg MJ'],
186  year=2015, doi='10.1038/nprot.2015.053')
187  self.system.software.append(s)
188 
189 def _get_fragment_is_rigid(fragment):
190  """Determine whether a fragment is modeled rigidly"""
191  leaves = IMP.atom.get_leaves(fragment.hier)
192  # Assume all leaves are set up as rigid/flexible in the same way
193  return IMP.core.RigidMember.get_is_setup(leaves[0])
194 
195 class _PDBFragment(ihm.representation.ResidueSegment):
196  """Record details about part of a PDB file used as input
197  for a component."""
198  def __init__(self, state, component, start, end, pdb_offset,
199  pdbname, chain, hier, asym_unit):
200  # start, end are PMI residue indexes (not IHM)
201  super(_PDBFragment, self).__init__(
202  asym_unit=asym_unit.pmi_range(start, end),
203  rigid=None, primitive='sphere')
204  self.component, self.start, self.end, self.offset, self.pdbname \
205  = component, start, end, pdb_offset, pdbname
206  self.state, self.chain, self.hier = state, chain, hier
209  self.starting_hier = IMP.atom.read_pdb(pdbname, state.model, sel)
210 
211  rigid = property(lambda self: _get_fragment_is_rigid(self),
212  lambda self, val: None)
213 
214  def combine(self, other):
215  pass
216 
217 
218 class _BeadsFragment(ihm.representation.FeatureSegment):
219  """Record details about beads used to represent part of a component."""
220  chain = None
221  def __init__(self, state, component, start, end, count, hier, asym_unit):
222  super(_BeadsFragment, self).__init__(asym_unit=asym_unit(start, end),
223  rigid=None, primitive='sphere', count=count)
224  self.state, self.component, self.hier \
225  = state, component, hier
226 
227  rigid = property(lambda self: _get_fragment_is_rigid(self),
228  lambda self, val: None)
229 
230  def combine(self, other):
231  # todo: don't combine if one fragment is rigid and the other flexible
232  if type(other) == type(self) and \
233  other.asym_unit.seq_id_range[0] == self.asym_unit.seq_id_range[1] + 1:
234  self.asym_unit.seq_id_range = (self.asym_unit.seq_id_range[0],
235  other.asym_unit.seq_id_range[1])
236  self.count += other.count
237  return True
238 
239 
240 class _AllModelRepresentations(object):
241  def __init__(self, simo):
242  self.simo = simo
243  # dict of fragments, ordered by representation, then component name,
244  # then state
245  self.fragments = OrderedDict()
246  self._all_representations = {}
247 
248  def copy_component(self, state, name, original, asym_unit):
249  """Copy all representation for `original` in `state` to `name`"""
250  def copy_frag(f):
251  newf = copy.copy(f)
252  newf.asym_unit = asym_unit(*f.asym_unit.seq_id_range)
253  return newf
254  for rep in self.fragments:
255  if original in self.fragments[rep]:
256  if name not in self.fragments[rep]:
257  self.fragments[rep][name] = OrderedDict()
258  self.fragments[rep][name][state] = [copy_frag(f)
259  for f in self.fragments[rep][original][state]]
260  # Assume representation for a component is the same in all
261  # states, so only write out the first one
262  first_state = list(self.fragments[rep][name].keys())[0]
263  if state is first_state:
264  representation = self._all_representations[rep]
265  representation.extend(self.fragments[rep][name][state])
266 
267  def add_fragment(self, state, representation, fragment):
268  """Add a model fragment."""
269  comp = fragment.component
270  id_rep = id(representation)
271  self._all_representations[id_rep] = representation
272  if id_rep not in self.fragments:
273  self.fragments[id_rep] = OrderedDict()
274  if comp not in self.fragments[id_rep]:
275  self.fragments[id_rep][comp] = OrderedDict()
276  if state not in self.fragments[id_rep][comp]:
277  self.fragments[id_rep][comp][state] = []
278  fragments = self.fragments[id_rep][comp][state]
279  if len(fragments) == 0 or not fragments[-1].combine(fragment):
280  fragments.append(fragment)
281  # Assume representation for a component is the same in all states,
282  # so only write out the first one
283  first_state = list(self.fragments[id_rep][comp].keys())[0]
284  if state is first_state:
285  representation.append(fragment)
286 
287 
288 class _AllDatasets(object):
289  """Track all datasets generated by PMI and add them to the ihm.System"""
290  def __init__(self, system):
291  self.system = system
292  self._datasets = []
293  self._datasets_by_state = {}
294  self._restraints_by_state = {}
295 
296  def get_all_group(self, state):
297  """Get a DatasetGroup encompassing all datasets so far in this state"""
298  # Note that if a restraint dataset is replaced after this is called,
299  # the group will still contain the old dataset - mark dataset as read
300  # only?
301  g = ihm.dataset.DatasetGroup(self._datasets_by_state.get(state, [])
302  + [r.dataset for r in self._restraints_by_state.get(state, [])
303  if r.dataset])
304  return g
305 
306  def add(self, state, dataset):
307  """Add a new dataset."""
308  self._datasets.append(dataset)
309  if state not in self._datasets_by_state:
310  self._datasets_by_state[state] = []
311  self._datasets_by_state[state].append(dataset)
312  # Make sure that the dataset ends up in the mmCIF file even if nothing
313  # refers to it
314  self.system.orphan_datasets.append(dataset)
315  return dataset
316 
317  def add_restraint(self, state, restraint):
318  """Add the dataset for a restraint"""
319  if state not in self._restraints_by_state:
320  self._restraints_by_state[state] = []
321  self._restraints_by_state[state].append(restraint)
322 
323 
324 class _CrossLinkRestraint(ihm.restraint.CrossLinkRestraint):
325  """Restrain to a set of cross links"""
326 
327  assembly = None
328  _label_map = {'wtDSS':'DSS', 'scDSS':'DSS', 'scEDC':'EDC'}
329  _descriptor_map = {'DSS': ihm.cross_linkers.dss,
330  'EDC': ihm.cross_linkers.edc}
331 
332  def __init__(self, pmi_restraint):
333  self.pmi_restraint = pmi_restraint
334  label = self.pmi_restraint.label
335  # Map commonly-used subtypes to more standard labels
336  label = self._label_map.get(label, label)
337  self.label = label
338  super(_CrossLinkRestraint, self).__init__(
339  dataset=self.pmi_restraint.dataset,
340  linker=self._get_chem_descriptor(label))
341 
342  @classmethod
343  def _get_chem_descriptor(cls, label):
344  if label not in cls._descriptor_map:
345  # If label is not a standard linker type, make a new chemical
346  # descriptor containing just the name. We don't know the chemistry
347  # so cannot specify a SMILES or INCHI string for it at this point
348  d = ihm.ChemDescriptor(label)
349  cls._descriptor_map[label] = d
350  return cls._descriptor_map[label]
351 
352  def _set_psi_sigma(self, model):
353  old_values = []
354  # Do nothing if given a different state than the restraint applies to
355  if model.m != self.pmi_restraint.m:
356  return
357  for resolution in self.pmi_restraint.sigma_dictionary:
358  statname = 'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, self.label)
359  if model.stats and statname in model.stats:
360  sigma = float(model.stats[statname])
361  p = self.pmi_restraint.sigma_dictionary[resolution][0]
362  old_values.append((p, p.get_scale()))
363  p.set_scale(sigma)
364  for psiindex in self.pmi_restraint.psi_dictionary:
365  statname = 'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, self.label)
366  if model.stats and statname in model.stats:
367  psi = float(model.stats[statname])
368  p = self.pmi_restraint.psi_dictionary[psiindex][0]
369  old_values.append((p, p.get_scale()))
370  p.set_scale(psi)
371  # If the same particle is set multiple times, make sure we get the
372  # original value last
373  return list(reversed(old_values))
374 
375  def add_fits_from_model_statfile(self, model):
376  # Set psi/sigma for all particles
377  old_values = self._set_psi_sigma(model)
378  # If no stats were collected, we can't show the fit
379  if not old_values:
380  return
381  for xl in self.cross_links:
382  # Get current psi/sigma particle value for each XL
383  xl.fits[model] = ihm.restraint.CrossLinkFit(
384  psi=xl.psi, sigma1=xl.sigma1, sigma2=xl.sigma2)
385  # Restore original psi/sigma
386  for p, scale in old_values:
387  p.set_scale(scale)
388 
389  # Have our dataset point to that in the original PMI restraint
390  def __set_dataset(self, val):
391  self.pmi_restraint.dataset = val
392  dataset = property(lambda self: self.pmi_restraint.dataset,
393  __set_dataset)
394 
395 
396 def get_asym_mapper_for_state(simo, state, asym_map):
397  asym = asym_map.get(state, None)
398  if asym is None:
399  asym = _AsymMapper(simo, state.prot)
400  asym_map[state] = asym
401  return asym
402 
403 class _PMICrossLink(object):
404  # Query PMI particles to get psi and sigma values at time of use, not
405  # time of construction (since they may be set after creating the restraint)
406  psi = property(lambda self: self.psi_p.get_scale(),
407  lambda self, val: None)
408  sigma1 = property(lambda self: self.sigma1_p.get_scale(),
409  lambda self, val: None)
410  sigma2 = property(lambda self: self.sigma2_p.get_scale(),
411  lambda self, val: None)
412 
413 
414 class _ResidueCrossLink(ihm.restraint.ResidueCrossLink, _PMICrossLink):
415  pass
416 
417 
418 class _FeatureCrossLink(ihm.restraint.FeatureCrossLink, _PMICrossLink):
419  pass
420 
421 
422 class _EM2DRestraint(ihm.restraint.EM2DRestraint):
423  def __init__(self, state, pmi_restraint, image_number, resolution,
424  pixel_size, image_resolution, projection_number,
425  micrographs_number):
426  self.pmi_restraint, self.image_number = pmi_restraint, image_number
427  super(_EM2DRestraint, self).__init__(
428  dataset=pmi_restraint.datasets[image_number],
429  assembly=state.modeled_assembly,
430  segment=False, number_raw_micrographs=micrographs_number,
431  pixel_size_width=pixel_size, pixel_size_height=pixel_size,
432  image_resolution=image_resolution,
433  number_of_projections=projection_number)
434 
435  # Have our dataset point to that in the original PMI restraint
436  def __get_dataset(self):
437  return self.pmi_restraint.datasets[self.image_number]
438  def __set_dataset(self, val):
439  self.pmi_restraint.datasets[self.image_number] = val
440  dataset = property(__get_dataset, __set_dataset)
441 
442  def add_fits_from_model_statfile(self, model):
443  ccc = self._get_cross_correlation(model)
444  transform = self._get_transformation(model)
445  rot = transform.get_rotation()
446  rm = [[e for e in rot.get_rotation_matrix_row(i)] for i in range(3)]
447  self.fits[model] = ihm.restraint.EM2DRestraintFit(
448  cross_correlation_coefficient=ccc,
449  rot_matrix=rm,
450  tr_vector=transform.get_translation())
451 
452  def _get_transformation(self, model):
453  """Get the transformation that places the model on the image"""
454  stats = model.em2d_stats or model.stats
455  prefix = 'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
456  self.image_number + 1)
457  r = [float(stats[prefix + '_Rotation%d' % i]) for i in range(4)]
458  t = [float(stats[prefix + '_Translation%d' % i])
459  for i in range(3)]
460  # If the model coordinates are transformed, need to back transform
461  # them first
462  inv = model.transform.get_inverse()
464  IMP.algebra.Vector3D(*t)) * inv
465 
466  def _get_cross_correlation(self, model):
467  """Get the cross correlation coefficient between the model projection
468  and the image"""
469  stats = model.em2d_stats or model.stats
470  return float(stats['ElectronMicroscopy2D_%s_Image%d_CCC'
471  % (self.pmi_restraint.label,
472  self.image_number + 1)])
473 
474 
475 class _EM3DRestraint(ihm.restraint.EM3DRestraint):
476 
477  def __init__(self, simo, state, pmi_restraint, target_ps, densities):
478  self.pmi_restraint = pmi_restraint
479  super(_EM3DRestraint, self).__init__(
480  dataset=pmi_restraint.dataset,
481  assembly=self._get_assembly(densities, simo, state),
482  fitting_method='Gaussian mixture models',
483  number_of_gaussians=len(target_ps))
484 
485  # Have our dataset point to that in the original PMI restraint
486  def __set_dataset(self, val):
487  self.pmi_restraint.dataset = val
488  dataset = property(lambda self: self.pmi_restraint.dataset,
489  __set_dataset)
490 
491  def _get_assembly(self, densities, simo, state):
492  """Get the Assembly that this restraint acts on"""
493  cm = _ComponentMapper(state.prot)
494  components = {}
495  for d in densities:
496  components[cm[d]] = None # None == all residues in this component
497  a = simo._get_subassembly(components,
498  name="EM subassembly",
499  description="All components that fit the EM map")
500  return a
501 
502  def add_fits_from_model_statfile(self, model):
503  ccc = self._get_cross_correlation(model)
504  self.fits[model] = ihm.restraint.EM3DRestraintFit(
505  cross_correlation_coefficient=ccc)
506 
507  def _get_cross_correlation(self, model):
508  """Get the cross correlation coefficient between the model
509  and the map"""
510  if model.stats is not None:
511  return float(model.stats['GaussianEMRestraint_%s_CCC'
512  % self.pmi_restraint.label])
513 
514 
515 class _GeometricRestraint(ihm.restraint.GeometricRestraint):
516 
517  def __init__(self, simo, state, pmi_restraint, geometric_object,
518  feature, distance, sigma):
519  self.pmi_restraint = pmi_restraint
520  super(_GeometricRestraint, self).__init__(
521  dataset=pmi_restraint.dataset,
522  geometric_object=geometric_object, feature=feature,
523  distance=distance, harmonic_force_constant = 1. / sigma,
524  restrain_all=True)
525 
526  # Have our dataset point to that in the original PMI restraint
527  def __set_dataset(self, val):
528  self.pmi_restraint.dataset = val
529  dataset = property(lambda self: self.pmi_restraint.dataset,
530  __set_dataset)
531 
532 
533 class _ReplicaExchangeProtocolStep(ihm.protocol.Step):
534  def __init__(self, state, rex):
535  if rex.monte_carlo_sample_objects is not None:
536  method = 'Replica exchange monte carlo'
537  else:
538  method = 'Replica exchange molecular dynamics'
539  super(_ReplicaExchangeProtocolStep, self).__init__(
540  assembly=state.modeled_assembly,
541  dataset_group=None, # filled in by add_step()
542  method=method, name='Sampling',
543  num_models_begin=None, # filled in by add_step()
544  num_models_end=rex.vars["number_of_frames"],
545  multi_scale=True, multi_state=False, ordered=False,
546  ensemble=True)
547 
548 
549 class _SimpleProtocolStep(ihm.protocol.Step):
550  def __init__(self, state, num_models_end, method):
551  super(_SimpleProtocolStep, self).__init__(
552  assembly=state.modeled_assembly,
553  dataset_group=None, # filled in by add_step()
554  method=method, name='Sampling',
555  num_models_begin=None, # filled in by add_step()
556  num_models_end=num_models_end,
557  multi_scale=True, multi_state=False, ordered=False,
558  ensemble=True)
559 
560 
561 class _Chain(object):
562  """Represent a single chain in a Model"""
563  def __init__(self, pmi_chain_id, asym_unit):
564  self.pmi_chain_id, self.asym_unit = pmi_chain_id, asym_unit
565  self.spheres = []
566  self.atoms = []
567 
568  def add(self, xyz, atom_type, residue_type, residue_index,
569  all_indexes, radius):
570  if atom_type is None:
571  self.spheres.append((xyz, residue_type, residue_index,
572  all_indexes, radius))
573  else:
574  self.atoms.append((xyz, atom_type, residue_type, residue_index,
575  all_indexes, radius))
576  orig_comp = property(lambda self: self.comp)
577 
578 class _TransformedChain(object):
579  """Represent a chain that is a transformed version of another"""
580  def __init__(self, orig_chain, asym_unit, transform):
581  self.orig_chain, self.asym_unit = orig_chain, asym_unit
582  self.transform = transform
583 
584  def __get_spheres(self):
585  for (xyz, residue_type, residue_index, all_indexes,
586  radius) in self.orig_chain.spheres:
587  yield (self.transform * xyz, residue_type, residue_index,
588  all_indexes, radius)
589  spheres = property(__get_spheres)
590 
591  def __get_atoms(self):
592  for (xyz, atom_type, residue_type, residue_index, all_indexes,
593  radius) in self.orig_chain.atoms:
594  yield (self.transform * xyz, atom_type, residue_type, residue_index,
595  all_indexes, radius)
596  atoms = property(__get_atoms)
597 
598  entity = property(lambda self: self.orig_chain.entity)
599  orig_comp = property(lambda self: self.orig_chain.comp)
600 
601 class _Excluder(object):
602  def __init__(self, component, simo):
603  self._seqranges = simo._exclude_coords.get(component, [])
604 
605  def is_excluded(self, indexes):
606  """Return True iff the given sequence range is excluded."""
607  for seqrange in self._seqranges:
608  if indexes[0] >= seqrange[0] and indexes[-1] <= seqrange[1]:
609  return True
610 
611 
612 class _Model(ihm.model.Model):
613  def __init__(self, prot, simo, protocol, assembly, representation):
614  super(_Model, self).__init__(assembly=assembly, protocol=protocol,
615  representation=representation)
616  self.simo = weakref.proxy(simo)
617  # Transformation from IMP coordinates into mmCIF coordinate system.
618  # Normally we pass through coordinates unchanged, but in some cases
619  # we may want to translate them (e.g. Nup84, where the deposited PDB
620  # files are all centered; we want the mmCIF files to match)
622  self.em2d_stats = None
623  self.stats = None
624  # True iff restraints act on this model
625  self._is_restrained = True
626  o = self.output = IMP.pmi1.output.Output(atomistic=True)
627  name = 'cif-output'
628  self.m = prot.get_model()
629  o.dictionary_pdbs[name] = prot
630  o._init_dictchain(name, prot, multichar_chain=True)
631  (particle_infos_for_pdb,
632  self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
633  self.geometric_center = IMP.algebra.Vector3D(*self.geometric_center)
634  self._make_spheres_atoms(particle_infos_for_pdb, o, name, simo)
635  self.rmsf = {}
636 
637  def all_chains(self, simo):
638  """Yield all chains, including transformed ones"""
639  chain_for_comp = {}
640  for c in self.chains:
641  yield c
642  chain_for_comp[c.comp] = c
643  for tc in simo._transformed_components:
644  orig_chain = chain_for_comp.get(tc.original, None)
645  if orig_chain:
646  asym = simo.asym_units[tc.name]
647  c = _TransformedChain(orig_chain, asym, tc.transform)
648  c.comp = tc.name
649  yield c
650 
651  def _make_spheres_atoms(self, particle_infos_for_pdb, o, name, simo):
652  entity_for_chain = {}
653  comp_for_chain = {}
654  correct_asym = {}
655  for protname, chain_id in o.dictchain[name].items():
656  entity_for_chain[chain_id] = simo.entities[protname]
657  comp_for_chain[chain_id] = protname
658  # When doing multi-state modeling, the chain ID returned here
659  # (assigned sequentially) might not be correct (states may have
660  # gaps in the chain IDs). Map it to the correct asym unit.
661  correct_asym[chain_id] = simo.asym_units[protname]
662 
663  # Gather by chain ID (should be sorted by chain ID already)
664  self.chains = []
665  chain = None
666  excluder = None
667 
668  for (xyz, atom_type, residue_type, chain_id, residue_index,
669  all_indexes, radius) in particle_infos_for_pdb:
670  if chain is None or chain.pmi_chain_id != chain_id:
671  chain = _Chain(chain_id, correct_asym[chain_id])
672  chain.entity = entity_for_chain[chain_id]
673  chain.comp = comp_for_chain[chain_id]
674  self.chains.append(chain)
675  excluder = _Excluder(chain.comp, simo)
676  if not excluder.is_excluded(all_indexes if all_indexes
677  else [residue_index]):
678  chain.add(xyz, atom_type, residue_type, residue_index,
679  all_indexes, radius)
680 
681  def parse_rmsf_file(self, fname, component):
682  self.rmsf[component] = rmsf = {}
683  with open(fname) as fh:
684  for line in fh:
685  resnum, blocknum, val = line.split()
686  rmsf[int(resnum)] = (int(blocknum), float(val))
687 
688  def get_rmsf(self, component, indexes):
689  """Get the RMSF value for the given residue indexes."""
690  if not self.rmsf:
691  return None
692  rmsf = self.rmsf[component]
693  blocknums = dict.fromkeys(rmsf[ind][0] for ind in indexes)
694  if len(blocknums) != 1:
695  raise ValueError("Residue indexes %s aren't all in the same block"
696  % str(indexes))
697  return rmsf[indexes[0]][1]
698 
699  def get_atoms(self):
700  for chain in self.all_chains(self.simo):
701  pmi_offset = chain.asym_unit.entity.pmi_offset
702  for atom in chain.atoms:
703  (xyz, atom_type, residue_type, residue_index,
704  all_indexes, radius) = atom
705  pt = self.transform * xyz
706  yield ihm.model.Atom(asym_unit=chain.asym_unit,
707  seq_id=residue_index - pmi_offset,
708  atom_id=atom_type.get_string(),
709  type_symbol=None, # todo: get element
710  x=pt[0], y=pt[1], z=pt[2])
711 
712  def get_spheres(self):
713  for chain in self.all_chains(self.simo):
714  pmi_offset = chain.asym_unit.entity.pmi_offset
715  for sphere in chain.spheres:
716  (xyz, residue_type, residue_index,
717  all_indexes, radius) = sphere
718  if all_indexes is None:
719  all_indexes = (residue_index,)
720  pt = self.transform * xyz
721  yield ihm.model.Sphere(asym_unit=chain.asym_unit,
722  seq_id_range=(all_indexes[0] - pmi_offset,
723  all_indexes[-1] - pmi_offset),
724  x=pt[0], y=pt[1], z=pt[2], radius=radius,
725  rmsf=self.get_rmsf(chain.orig_comp, all_indexes))
726 
727 
728 class _AllProtocols(object):
729  def __init__(self, simo):
730  self.simo = weakref.proxy(simo)
731  # Lists of Protocols by state
732  self.protocols = OrderedDict()
733 
734  def add_protocol(self, state):
735  """Add a new Protocol"""
736  if state not in self.protocols:
737  self.protocols[state] = []
738  p = ihm.protocol.Protocol()
739  self.simo.system.orphan_protocols.append(p)
740  self.protocols[state].append(p)
741 
742  def add_step(self, step, state):
743  """Add a ProtocolStep to the last Protocol of the given State"""
744  if state not in self.protocols:
745  self.add_protocol(state)
746  protocol = self.get_last_protocol(state)
747  if len(protocol.steps) == 0:
748  step.num_models_begin = 0
749  else:
750  step.num_models_begin = protocol.steps[-1].num_models_end
751  protocol.steps.append(step)
752  step.id = len(protocol.steps)
753  # Assume that protocol uses all currently-defined datasets
754  step.dataset_group = self.simo.all_datasets.get_all_group(state)
755 
756  def add_postproc(self, step, state):
757  """Add a postprocessing step to the last protocol"""
758  protocol = self.get_last_protocol(state)
759  if len(protocol.analyses) == 0:
760  protocol.analyses.append(ihm.analysis.Analysis())
761  protocol.analyses[-1].steps.append(step)
762 
763  def get_last_protocol(self, state):
764  """Return the most recently-added _Protocol"""
765  return self.protocols[state][-1]
766 
767 
768 class _AllStartingModels(object):
769  def __init__(self, simo):
770  self.simo = simo
771  # dict of starting models (entire PDB files), collected from fragments,
772  # ordered by component name and state
773  self.models = OrderedDict()
774  self.output = IMP.pmi1.output.Output()
775 
776  def add_pdb_fragment(self, fragment):
777  """Add a starting model PDB fragment."""
778  comp = fragment.component
779  state = fragment.state
780  if comp not in self.models:
781  self.models[comp] = OrderedDict()
782  if state not in self.models[comp]:
783  self.models[comp][state] = []
784  models = self.models[comp][state]
785  if len(models) == 0 \
786  or models[-1].fragments[0].pdbname != fragment.pdbname:
787  model = self._add_model(fragment)
788  models.append(model)
789  else:
790  # Break circular ref between fragment and model
791  models[-1].fragments.append(weakref.proxy(fragment))
792  # Update residue range to cover all fragments
793  pmi_offset = models[-1].asym_unit.entity.pmi_offset
794  sid_begin = min(fragment.start + fragment.offset - pmi_offset,
795  models[-1].asym_unit.seq_id_range[0])
796  sid_end = max(fragment.end + fragment.offset - pmi_offset,
797  models[-1].asym_unit.seq_id_range[1])
798  models[-1].asym_unit = fragment.asym_unit.asym(sid_begin, sid_end)
799  fragment.starting_model = models[-1]
800 
801  def _add_model(self, f):
802  parser = ihm.metadata.PDBParser()
803  r = parser.parse_file(f.pdbname)
804 
805  self.simo._add_dataset(r['dataset'])
806  # We only want the templates that model the starting model chain
807  templates = r['templates'].get(f.chain, [])
808  for t in templates:
809  if t.alignment_file:
810  self.simo.system.locations.append(t.alignment_file)
811  if t.dataset:
812  self.simo._add_dataset(t.dataset)
813  pmi_offset = f.asym_unit.entity.pmi_offset
814  m = _StartingModel(
815  asym_unit=f.asym_unit.asym.pmi_range(f.start, f.end),
816  dataset=r['dataset'], asym_id=f.chain,
817  templates=templates, offset=f.offset + pmi_offset,
818  metadata=r['metadata'],
819  software=r['software'][0] if r['software'] else None,
820  script_file=r['script'])
821  m.fragments = [weakref.proxy(f)]
822  return m
823 
824 
825 class _StartingModel(ihm.startmodel.StartingModel):
826  def get_seq_dif(self):
827  return self._seq_dif
828 
829  def get_atoms(self):
830  pmi_offset = self.asym_unit.entity.pmi_offset
831  mh = IMP.mmcif.data._StartingModelAtomHandler(self.templates,
832  self.asym_unit)
833  for f in self.fragments:
834  sel = IMP.atom.Selection(f.starting_hier,
835  residue_indexes=list(range(f.start - f.offset,
836  f.end - f.offset + 1)))
837  for a in mh.get_ihm_atoms(sel.get_selected_particles(),
838  f.offset - pmi_offset):
839  yield a
840  self._seq_dif = mh._seq_dif
841 
842 
843 class _ReplicaExchangeAnalysisPostProcess(ihm.analysis.ClusterStep):
844  """Post processing using AnalysisReplicaExchange0 macro"""
845 
846  def __init__(self, rex, num_models_begin):
847  self.rex = rex
848  num_models_end = 0
849  for fname in self.get_all_stat_files():
850  with open(fname) as fh:
851  num_models_end += len(fh.readlines())
852  super(_ReplicaExchangeAnalysisPostProcess, self).__init__(
853  feature='RMSD', num_models_begin=num_models_begin,
854  num_models_end=num_models_end)
855 
856  def get_stat_file(self, cluster_num):
857  return os.path.join(self.rex._outputdir, "cluster.%d" % cluster_num,
858  'stat.out')
859 
860  def get_all_stat_files(self):
861  for i in range(self.rex._number_of_clusters):
862  yield self.get_stat_file(i)
863 
864 
865 class _ReplicaExchangeAnalysisEnsemble(ihm.model.Ensemble):
866  """Ensemble generated using AnalysisReplicaExchange0 macro"""
867 
868  num_models_deposited = None # Override base class property
869 
870  def __init__(self, pp, cluster_num, model_group, num_deposit):
871  with open(pp.get_stat_file(cluster_num)) as fh:
872  num_models = len(fh.readlines())
873  super(_ReplicaExchangeAnalysisEnsemble, self).__init__(
874  num_models=num_models,
875  model_group=model_group, post_process=pp,
876  clustering_feature=pp.feature,
877  name=model_group.name)
878  self.cluster_num = cluster_num
879  self.num_models_deposited = num_deposit
880 
881  def get_rmsf_file(self, component):
882  return os.path.join(self.post_process.rex._outputdir,
883  'cluster.%d' % self.cluster_num,
884  'rmsf.%s.dat' % component)
885 
886  def load_rmsf(self, model, component):
887  fname = self.get_rmsf_file(component)
888  if os.path.exists(fname):
889  model.parse_rmsf_file(fname, component)
890 
891  def get_localization_density_file(self, fname):
892  return os.path.join(self.post_process.rex._outputdir,
893  'cluster.%d' % self.cluster_num,
894  '%s.mrc' % fname)
895 
896  def load_localization_density(self, state, fname, select_tuple, asym_units):
897  fullpath = self.get_localization_density_file(fname)
898  if os.path.exists(fullpath):
899  details = "Localization density for %s %s" \
900  % (fname, self.model_group.name)
901  local_file = ihm.location.OutputFileLocation(fullpath,
902  details=details)
903  for s in select_tuple:
904  if isinstance(s, tuple) and len(s) == 3:
905  asym = asym_units[s[2]].pmi_range(s[0], s[1])
906  else:
907  asym = asym_units[s]
908  den = ihm.model.LocalizationDensity(file=local_file,
909  asym_unit=asym)
910  self.densities.append(den)
911 
912  def load_all_models(self, simo, state):
913  stat_fname = self.post_process.get_stat_file(self.cluster_num)
914  model_num = 0
915  with open(stat_fname) as fh:
916  stats = ast.literal_eval(fh.readline())
917  # Correct path
918  rmf_file = os.path.join(os.path.dirname(stat_fname),
919  "%d.rmf3" % model_num)
920  for c in state.all_modeled_components:
921  # todo: this only works with PMI 1
922  state._pmi_object.set_coordinates_from_rmf(c, rmf_file, 0,
923  force_rigid_update=True)
924  # todo: fill in other data from stat file, e.g. crosslink phi/psi
925  yield stats
926  model_num += 1
927  if model_num >= self.num_models_deposited:
928  return
929 
930  # todo: also support dRMS precision
931  def _get_precision(self):
932  precfile = os.path.join(self.post_process.rex._outputdir,
933  "precision.%d.%d.out" % (self.cluster_num,
934  self.cluster_num))
935  if not os.path.exists(precfile):
936  return ihm.unknown
937  # Fail if the precision.x.x.out file doesn't match the cluster
938  r = re.compile('All .*/cluster.%d/ average centroid distance ([\d\.]+)'
939  % self.cluster_num)
940  with open(precfile) as fh:
941  for line in fh:
942  m = r.match(line)
943  if m:
944  return float(m.group(1))
945 
946  precision = property(lambda self: self._get_precision(),
947  lambda self, val: None)
948 
949 class _SimpleEnsemble(ihm.model.Ensemble):
950  """Simple manually-created ensemble"""
951 
952  num_models_deposited = None # Override base class property
953 
954  def __init__(self, pp, model_group, num_models, drmsd,
955  num_models_deposited, ensemble_file):
956  super(_SimpleEnsemble, self).__init__(
957  model_group=model_group, post_process=pp, num_models=num_models,
958  file=ensemble_file, precision=drmsd, name=model_group.name,
959  clustering_feature='dRMSD')
960  self.num_models_deposited = num_models_deposited
961 
962  def load_localization_density(self, state, component, asym, local_file):
963  den = ihm.model.LocalizationDensity(file=local_file, asym_unit=asym)
964  self.densities.append(den)
965 
966 
967 class _EntityMapper(dict):
968  """Handle mapping from IMP components to CIF entities.
969  Multiple components may map to the same entity if they share sequence."""
970  def __init__(self, system):
971  super(_EntityMapper, self).__init__()
972  self._sequence_dict = {}
973  self._entities = []
974  self.system = system
975 
976  def add(self, component_name, sequence, offset):
977  def entity_seq(sequence):
978  # Map X to UNK
979  if 'X' in sequence:
980  return ['UNK' if s == 'X' else s for s in sequence]
981  else:
982  return sequence
983  if sequence not in self._sequence_dict:
984  # Use the name of the first component, stripped of any copy number,
985  # as the description of the entity
986  d = component_name.split("@")[0].split(".")[0]
987  entity = Entity(entity_seq(sequence), description=d,
988  pmi_offset=offset)
989  self.system.entities.append(entity)
990  self._sequence_dict[sequence] = entity
991  self[component_name] = self._sequence_dict[sequence]
992 
993 
994 class _TransformedComponent(object):
995  def __init__(self, name, original, transform):
996  self.name, self.original, self.transform = name, original, transform
997 
998 
999 class _SimpleRef(object):
1000  """Class with similar interface to weakref.ref, but keeps a strong ref"""
1001  def __init__(self, ref):
1002  self.ref = ref
1003  def __call__(self):
1004  return self.ref
1005 
1006 
1007 class _State(ihm.model.State):
1008  """Representation of a single state in the system."""
1009 
1010  def __init__(self, pmi_object, po):
1011  # Point to the PMI object for this state. Use a weak reference
1012  # since the state object typically points to us too, so we need
1013  # to break the reference cycle. In PMI1 this will be a
1014  # Representation object; in PMI2 it is the PMI2 State object itself.
1015  self._pmi_object = weakref.proxy(pmi_object)
1016  if hasattr(pmi_object, 'state'):
1017  # Need a strong ref to pmi_object.state to prevent it from being
1018  # cleaned up when doing PMI1 multistate modeling
1019  self._pmi_state = _SimpleRef(pmi_object.state)
1020  else:
1021  self._pmi_state = weakref.ref(pmi_object)
1022  # Preserve PMI state name
1023  old_name = self.name
1024  super(_State, self).__init__(experiment_type='Fraction of bulk')
1025  self.name = old_name
1026 
1027  # The assembly of all components modeled by IMP in this state.
1028  # This may be smaller than the complete assembly.
1029  self.modeled_assembly = ihm.Assembly(
1030  name="Modeled assembly",
1031  description=self.get_postfixed_name(
1032  "All components modeled by IMP"))
1033  po.system.orphan_assemblies.append(self.modeled_assembly)
1034 
1035  self.all_modeled_components = []
1036 
1037  def __hash__(self):
1038  return hash(self._pmi_state())
1039  def __eq__(self, other):
1040  return self._pmi_state() == other._pmi_state()
1041 
1042  def add_model_group(self, group):
1043  self.append(group)
1044  return group
1045 
1046  def get_prefixed_name(self, name):
1047  """Prefix the given name with the state name, if available."""
1048  if self.short_name:
1049  return self.short_name + ' ' + name
1050  else:
1051  # Can't use capitalize() since that un-capitalizes everything
1052  # but the first letter
1053  return name[0].upper() + name[1:] if name else ''
1054 
1055  def get_postfixed_name(self, name):
1056  """Postfix the given name with the state name, if available."""
1057  if self.short_name:
1058  return "%s in state %s" % (name, self.short_name)
1059  else:
1060  return name
1061 
1062  short_name = property(lambda self: self._pmi_state().short_name)
1063  long_name = property(lambda self: self._pmi_state().long_name)
1064  def __get_name(self):
1065  return self._pmi_state().long_name
1066  def __set_name(self, val):
1067  self._pmi_state().long_name = val
1068  name = property(__get_name, __set_name)
1069 
1070 
1071 class Entity(ihm.Entity):
1072  """A single entity in the system.
1073  This functions identically to the base ihm.Entity class, but it
1074  allows identifying residues by either the IHM numbering scheme
1075  (seq_id, which is always contiguous starting at 1) or by the PMI
1076  scheme (which is similar, but may not start at 1 if the sequence in
1077  the FASTA file has one or more N-terminal gaps"""
1078  def __init__(self, sequence, pmi_offset, *args, **keys):
1079  # Offset between PMI numbering and IHM; <pmi_#> = <ihm_#> + pmi_offset
1080  # (pmi_offset is also the number of N-terminal gaps in the FASTA file)
1081  self.pmi_offset = pmi_offset
1082  super(Entity, self).__init__(sequence, *args, **keys)
1083 
1084  def pmi_residue(self, res_id):
1085  """Return a single IHM residue indexed using PMI numbering"""
1086  return self.residue(res_id - self.pmi_offset)
1087 
1088  def pmi_range(self, res_id_begin, res_id_end):
1089  """Return a range of IHM residues indexed using PMI numbering"""
1090  off = self.pmi_offset
1091  return self(res_id_begin - off, res_id_end - off)
1092 
1093 
1094 class AsymUnit(ihm.AsymUnit):
1095  """A single asymmetric unit in the system.
1096  This functions identically to the base ihm.AsymUnit class, but it
1097  allows identifying residues by either the IHM numbering scheme
1098  (seq_id, which is always contiguous starting at 1) or by the PMI
1099  scheme (which is similar, but may not start at 1 if the sequence in
1100  the FASTA file has one or more N-terminal gaps"""
1101 
1102  def __init__(self, entity, *args, **keys):
1103  super(AsymUnit, self).__init__(
1104  entity, auth_seq_id_map=entity.pmi_offset, *args, **keys)
1105 
1106  def pmi_residue(self, res_id):
1107  """Return a single IHM residue indexed using PMI numbering"""
1108  return self.residue(res_id - self.entity.pmi_offset)
1109 
1110  def pmi_range(self, res_id_begin, res_id_end):
1111  """Return a range of IHM residues indexed using PMI numbering"""
1112  off = self.entity.pmi_offset
1113  return self(res_id_begin - off, res_id_end - off)
1114 
1115 
1117  """Class to encode a modeling protocol as mmCIF.
1118 
1119  IMP has basic support for writing out files in mmCIF format, for
1120  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
1121  After creating an instance of this class, attach it to an
1122  IMP.pmi1.representation.Representation object. After this, any
1123  generated models and metadata are automatically collected and
1124  output as mmCIF.
1125  """
1126  def __init__(self, fh):
1127  # Ultimately, collect data in an ihm.System object
1128  self.system = ihm.System()
1129  self._state_group = ihm.model.StateGroup()
1130  self.system.state_groups.append(self._state_group)
1131 
1132  self.fh = fh
1133  self._state_ensemble_offset = 0
1134  self._each_metadata = [] # list of metadata for each representation
1135  self._file_datasets = []
1136  self._main_script = os.path.abspath(sys.argv[0])
1137 
1138  # Point to the main modeling script
1139  loc = ihm.location.WorkflowFileLocation(path=self._main_script,
1140  details="The main integrative modeling script")
1141  self.system.locations.append(loc)
1142 
1143  self._states = {}
1144  self.__asym_states = {}
1145  self._working_directory = os.getcwd()
1146  self.default_representation = self.create_representation(
1147  "Default representation")
1148  self.entities = _EntityMapper(self.system)
1149  # Mapping from component names to ihm.AsymUnit
1150  self.asym_units = {}
1151  self._all_components = {}
1152  self.all_modeled_components = []
1153  self._transformed_components = []
1154  self.sequence_dict = {}
1155 
1156  # Common geometric objects used in PMI systems
1157  self._xy_plane = ihm.geometry.XYPlane()
1158  self._xz_plane = ihm.geometry.XZPlane()
1159  self._z_axis = ihm.geometry.ZAxis()
1160  self._center_origin = ihm.geometry.Center(0,0,0)
1161  self._identity_transform = ihm.geometry.Transformation.identity()
1162 
1163  # Coordinates to exclude
1164  self._exclude_coords = {}
1165 
1166  self.all_representations = _AllModelRepresentations(self)
1167  self.all_protocols = _AllProtocols(self)
1168  self.all_datasets = _AllDatasets(self.system)
1169  self.all_starting_models = _AllStartingModels(self)
1170 
1171  self.all_software = _AllSoftware(self.system)
1172 
1173  def create_representation(self, name):
1174  """Create a new Representation and return it. This can be
1175  passed to add_model(), add_bead_element() or add_pdb_element()."""
1176  r = ihm.representation.Representation()
1177  self.system.orphan_representations.append(r)
1178  return r
1179 
1180  def exclude_coordinates(self, component, seqrange):
1181  """Don't record coordinates for the given domain.
1182  Coordinates for the given domain (specified by a component name
1183  and a 2-element tuple giving the start and end residue numbers)
1184  will be excluded from the mmCIF file. This can be used to exclude
1185  parts of the structure that weren't well resolved in modeling.
1186  Any bead or residue that lies wholly within this range will be
1187  excluded. Multiple ranges for a given component can be excluded
1188  by calling this method multiple times."""
1189  if component not in self._exclude_coords:
1190  self._exclude_coords[component] = []
1191  self._exclude_coords[component].append(seqrange)
1192 
1193  def _is_excluded(self, component, start, end):
1194  """Return True iff this chunk of sequence should be excluded"""
1195  for seqrange in self._exclude_coords.get(component, ()):
1196  if start >= seqrange[0] and end <= seqrange[1]:
1197  return True
1198 
1199  def _add_state(self, state):
1200  """Create a new state and return a pointer to it."""
1201  self._state_ensemble_offset = len(self.system.ensembles)
1202  s = _State(state, self)
1203  self._state_group.append(s)
1204  self._last_state = s
1205  return s
1206 
1207  def get_file_dataset(self, fname):
1208  for d in self._file_datasets:
1209  fd = d.get(os.path.abspath(fname), None)
1210  if fd:
1211  return fd
1212 
1213  def _get_chain_for_component(self, name, output):
1214  """Get the chain ID for a component, if any."""
1215  # todo: handle multiple copies
1216  if name in self.asym_units:
1217  return self.asym_units[name]._id
1218  else:
1219  # A non-modeled component doesn't have a chain ID
1220  return None
1221 
1222  def _get_assembly_comps(self, assembly):
1223  """Get the names of the components in the given assembly"""
1224  # component name = asym_unit.details
1225  comps = {}
1226  for ca in assembly:
1227  comps[ca.details] = None
1228  return comps
1229 
1230  def create_transformed_component(self, state, name, original, transform):
1231  """Make a new component that's a transformed copy of another.
1232  All representation for the existing component is copied to the
1233  new one."""
1234  assembly_comps = self._get_assembly_comps(state.modeled_assembly)
1235  if name in assembly_comps:
1236  raise ValueError("Component %s already exists" % name)
1237  elif original not in assembly_comps:
1238  raise ValueError("Original component %s does not exist" % original)
1239  self.create_component(state, name, True)
1240  self.add_component_sequence(state, name, self.sequence_dict[original])
1241  self._transformed_components.append(_TransformedComponent(
1242  name, original, transform))
1243  self.all_representations.copy_component(state, name, original,
1244  self.asym_units[name])
1245 
1246  def create_component(self, state, name, modeled):
1247  new_comp = name not in self._all_components
1248  self._all_components[name] = None
1249  if modeled:
1250  state.all_modeled_components.append(name)
1251  if new_comp:
1252  self.asym_units[name] = None # assign asym once we get sequence
1253  self.all_modeled_components.append(name)
1254 
1255  def add_component_sequence(self, state, name, seq):
1256  def get_offset(seq):
1257  # Count length of gaps at start of sequence
1258  for i in range(len(seq)):
1259  if seq[i] != '-':
1260  return seq[i:], i
1261  seq, offset = get_offset(seq)
1262  if name in self.sequence_dict:
1263  if self.sequence_dict[name] != seq:
1264  raise ValueError("Sequence mismatch for component %s" % name)
1265  else:
1266  self.sequence_dict[name] = seq
1267  self.entities.add(name, seq, offset)
1268  if name in self.asym_units:
1269  if self.asym_units[name] is None:
1270  # Set up a new asymmetric unit for this component
1271  entity = self.entities[name]
1272  asym = AsymUnit(entity, details=name)
1273  self.system.asym_units.append(asym)
1274  self.asym_units[name] = asym
1275  state.modeled_assembly.append(self.asym_units[name])
1276 
1277  def _add_restraint_model_fits(self):
1278  """Add fits to restraints for all known models"""
1279  for group, m in self.system._all_models():
1280  if m._is_restrained:
1281  for r in self.system.restraints:
1282  if hasattr(r, 'add_fits_from_model_statfile'):
1283  r.add_fits_from_model_statfile(m)
1284 
1285  def _add_em2d_raw_micrographs(self):
1286  """If the deprecated metadata.EMMicrographsDataset class was used,
1287  transfer its data to the EM2D restraint."""
1288  for r in self.system.restraints:
1289  if isinstance(r, _EM2DRestraint):
1290  for d in r.dataset.parents:
1291  if isinstance(d, IMP.pmi1.metadata.EMMicrographsDataset):
1292  r.number_raw_micrographs = d.number
1293  if isinstance(d.location,
1294  IMP.pmi1.metadata.FileLocation):
1295  d.location.content_type = \
1296  ihm.location.InputFileLocation.content_type
1297 
1298  def flush(self):
1299  self.finalize()
1300  ihm.dumper.write(self.fh, [self.system])
1301 
1302  def finalize(self):
1303  """Do any final processing on the class hierarchy.
1304  After calling this method, the `system` member (an instance
1305  of `ihm.System`) completely reproduces the PMI modeling, and
1306  can be written out to an mmCIF file with `ihm.dumper.write`,
1307  and/or modified using the ihm API."""
1308  self._add_restraint_model_fits()
1309 
1310  self._add_em2d_raw_micrographs()
1311 
1312  # Add metadata to ihm.System
1313  self.system.software.extend(m for m in self._metadata
1314  if isinstance(m, ihm.Software))
1315  self.system.citations.extend(m for m in self._metadata
1316  if isinstance(m, ihm.Citation))
1317  self.system.locations.extend(m for m in self._metadata
1318  if isinstance(m, ihm.location.FileLocation))
1319 
1320  # Point all locations to repos, if applicable
1321  all_repos = [m for m in self._metadata
1322  if isinstance(m, ihm.location.Repository)]
1323  self.system.update_locations_in_repositories(all_repos)
1324 
1325  def add_pdb_element(self, state, name, start, end, offset, pdbname,
1326  chain, hier, representation=None):
1327  if self._is_excluded(name, start, end):
1328  return
1329  if representation is None:
1330  representation = self.default_representation
1331  asym = self.asym_units[name]
1332  p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
1333  hier, asym)
1334  self.all_representations.add_fragment(state, representation, p)
1335  self.all_starting_models.add_pdb_fragment(p)
1336 
1337  def add_bead_element(self, state, name, start, end, num, hier,
1338  representation=None):
1339  if self._is_excluded(name, start, end):
1340  return
1341  if representation is None:
1342  representation = self.default_representation
1343  asym = self.asym_units[name]
1344  pmi_offset = asym.entity.pmi_offset
1345  b = _BeadsFragment(state, name, start - pmi_offset, end - pmi_offset,
1346  num, hier, asym)
1347  self.all_representations.add_fragment(state, representation, b)
1348 
1349  def get_cross_link_group(self, pmi_restraint):
1350  r = _CrossLinkRestraint(pmi_restraint)
1351  self.system.restraints.append(r)
1352  self._add_restraint_dataset(r) # so that all-dataset group works
1353  return r
1354 
1355  def add_experimental_cross_link(self, r1, c1, r2, c2, rsr):
1356  if c1 not in self._all_components or c2 not in self._all_components:
1357  # Crosslink refers to a component we didn't model
1358  # As a quick hack, just ignore it.
1359  # todo: need to add an entity for this anyway (so will need the
1360  # sequence, and add to struct_assembly)
1361  return None
1362  e1 = self.entities[c1]
1363  e2 = self.entities[c2]
1364  xl = ihm.restraint.ExperimentalCrossLink(residue1=e1.pmi_residue(r1),
1365  residue2=e2.pmi_residue(r2))
1366  rsr.experimental_cross_links.append([xl])
1367  return xl
1368 
1369  def add_cross_link(self, state, ex_xl, p1, p2, length, sigma1_p, sigma2_p,
1370  psi_p, rsr):
1371  # Map p1/p2 to asym units
1372  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1373  d = ihm.restraint.UpperBoundDistanceRestraint(length)
1374 
1375  if _get_by_residue(p1) and _get_by_residue(p2):
1376  cls = _ResidueCrossLink
1377  else:
1378  cls = _FeatureCrossLink
1379  xl = cls(ex_xl, asym1=asym[p1], asym2=asym[p2], distance=d,
1380  restrain_all=True)
1381  # Needed to look up fits later
1382  xl.psi_p, xl.sigma1_p, xl.sigma2_p = psi_p, sigma1_p, sigma2_p
1383  rsr.cross_links.append(xl)
1384 
1385  def add_replica_exchange(self, state, rex):
1386  # todo: allow for metadata to say how many replicas were used in the
1387  # actual experiment, and how many independent runs were carried out
1388  # (use these as multipliers to get the correct total number of
1389  # output models)
1390  self.all_protocols.add_step(_ReplicaExchangeProtocolStep(state, rex),
1391  state)
1392 
1393  def _add_simple_dynamics(self, num_models_end, method):
1394  # Always assumed that we're dealing with the last state
1395  state = self._last_state
1396  self.all_protocols.add_step(_SimpleProtocolStep(state, num_models_end,
1397  method), state)
1398 
1399  def _add_protocol(self):
1400  # Always assumed that we're dealing with the last state
1401  state = self._last_state
1402  self.all_protocols.add_protocol(state)
1403 
1404  def _add_dataset(self, dataset):
1405  return self.all_datasets.add(self._last_state, dataset)
1406 
1407  def _add_restraint_dataset(self, restraint):
1408  return self.all_datasets.add_restraint(self._last_state, restraint)
1409 
1410  def _add_simple_postprocessing(self, num_models_begin, num_models_end):
1411  # Always assumed that we're dealing with the last state
1412  state = self._last_state
1413  pp = ihm.analysis.ClusterStep('RMSD', num_models_begin, num_models_end)
1414  self.all_protocols.add_postproc(pp, state)
1415  return pp
1416 
1417  def _add_no_postprocessing(self, num_models):
1418  # Always assumed that we're dealing with the last state
1419  state = self._last_state
1420  pp = ihm.analysis.EmptyStep()
1421  pp.num_models_begin = pp.num_models_end = num_models
1422  self.all_protocols.add_postproc(pp, state)
1423  return pp
1424 
1425  def _add_simple_ensemble(self, pp, name, num_models, drmsd,
1426  num_models_deposited, localization_densities,
1427  ensemble_file):
1428  """Add an ensemble generated by ad hoc methods (not using PMI).
1429  This is currently only used by the Nup84 system."""
1430  # Always assumed that we're dealing with the last state
1431  state = self._last_state
1432  group = ihm.model.ModelGroup(name=state.get_postfixed_name(name))
1433  state.add_model_group(group)
1434  if ensemble_file:
1435  # Deprecated Location class does not state input vs output
1436  if isinstance(ensemble_file, IMP.pmi1.metadata.FileLocation):
1437  ensemble_file.content_type = \
1438  ihm.location.OutputFileLocation.content_type
1439  self.system.locations.append(ensemble_file)
1440  e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
1441  ensemble_file)
1442  self.system.ensembles.append(e)
1443  for c in state.all_modeled_components:
1444  den = localization_densities.get(c, None)
1445  if den:
1446  # Deprecated Location class does not state input vs output
1447  if isinstance(den, IMP.pmi1.metadata.FileLocation):
1448  den.content_type = \
1449  ihm.location.OutputFileLocation.content_type
1450  e.load_localization_density(state, c, self.asym_units[c], den)
1451  return e
1452 
1453  def set_ensemble_file(self, i, location):
1454  """Point a previously-created ensemble to an 'all-models' file.
1455  This could be a trajectory such as DCD, an RMF, or a multimodel
1456  PDB file."""
1457  # Deprecated Location class does not state input vs output
1458  if isinstance(location, IMP.pmi1.metadata.FileLocation):
1459  location.content_type = ihm.location.OutputFileLocation.content_type
1460  self.system.locations.append(location)
1461  # Ensure that we point to an ensemble related to the current state
1462  ind = i + self._state_ensemble_offset
1463  self.system.ensembles[ind].file = location
1464 
1465  def add_replica_exchange_analysis(self, state, rex, density_custom_ranges):
1466  # todo: add prefilter as an additional postprocess step (complication:
1467  # we don't know how many models it filtered)
1468  # todo: postpone rmsf/density localization extraction until after
1469  # relevant methods are called (currently it is done from the
1470  # constructor)
1471  protocol = self.all_protocols.get_last_protocol(state)
1472  num_models = protocol.steps[-1].num_models_end
1473  pp = _ReplicaExchangeAnalysisPostProcess(rex, num_models)
1474  self.all_protocols.add_postproc(pp, state)
1475  for i in range(rex._number_of_clusters):
1476  group = ihm.model.ModelGroup(name=state.get_prefixed_name(
1477  'cluster %d' % (i + 1)))
1478  state.add_model_group(group)
1479  # todo: make # of models to deposit configurable somewhere
1480  e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
1481  self.system.ensembles.append(e)
1482  # Add localization density info if available
1483  for fname, stuple in sorted(density_custom_ranges.items()):
1484  e.load_localization_density(state, fname, stuple,
1485  self.asym_units)
1486  for stats in e.load_all_models(self, state):
1487  m = self.add_model(group)
1488  # Since we currently only deposit 1 model, it is the
1489  # best scoring one
1490  m.name = 'Best scoring model'
1491  m.stats = stats
1492  # Add RMSF info if available
1493  for c in state.all_modeled_components:
1494  e.load_rmsf(m, c)
1495 
1496  def _get_subassembly(self, comps, name, description):
1497  """Get an Assembly consisting of the given components.
1498  `compdict` is a dictionary of the components to add, where keys
1499  are the component names and values are the sequence ranges (or
1500  None to use all residues in the component)."""
1501  asyms = []
1502  for comp, seqrng in comps.items():
1503  a = self.asym_units[comp]
1504  asyms.append(a if seqrng is None else a(*seqrng))
1505 
1506  a = ihm.Assembly(asyms, name=name, description=description)
1507  return a
1508 
1509  def _add_foxs_restraint(self, model, comp, seqrange, dataset, rg, chi,
1510  details):
1511  """Add a basic FoXS fit. This is largely intended for use from the
1512  NPC application."""
1513  assembly = self._get_subassembly({comp:seqrange},
1514  name="SAXS subassembly",
1515  description="All components that fit SAXS data")
1516  r = ihm.restraint.SASRestraint(dataset, assembly, segment=False,
1517  fitting_method='FoXS', fitting_atom_type='Heavy atoms',
1518  multi_state=False, radius_of_gyration=rg, details=details)
1519  r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi)
1520  self.system.restraints.append(r)
1521  self._add_restraint_dataset(r) # so that all-dataset group works
1522 
1523  def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
1524  image_resolution, projection_number,
1525  micrographs_number):
1526  r = _EM2DRestraint(state, r, i, resolution, pixel_size,
1527  image_resolution, projection_number,
1528  micrographs_number)
1529  self.system.restraints.append(r)
1530  self._add_restraint_dataset(r) # so that all-dataset group works
1531 
1532  def add_em3d_restraint(self, state, target_ps, densities, pmi_restraint):
1533  # todo: need to set allow_duplicates on this dataset?
1534  r = _EM3DRestraint(self, state, pmi_restraint, target_ps, densities)
1535  self.system.restraints.append(r)
1536  self._add_restraint_dataset(r) # so that all-dataset group works
1537 
1538  def add_zaxial_restraint(self, state, ps, lower_bound, upper_bound,
1539  sigma, pmi_restraint):
1540  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1541  sigma, pmi_restraint, self._xy_plane)
1542 
1543  def add_yaxial_restraint(self, state, ps, lower_bound, upper_bound,
1544  sigma, pmi_restraint):
1545  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1546  sigma, pmi_restraint, self._xz_plane)
1547 
1548  def add_xyradial_restraint(self, state, ps, lower_bound, upper_bound,
1549  sigma, pmi_restraint):
1550  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1551  sigma, pmi_restraint, self._z_axis)
1552 
1553  def _add_geometric_restraint(self, state, ps, lower_bound, upper_bound,
1554  sigma, pmi_restraint, geom):
1555  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1556  r = _GeometricRestraint(self, state, pmi_restraint, geom,
1557  asym.get_feature(ps),
1558  ihm.restraint.LowerUpperBoundDistanceRestraint(
1559  lower_bound, upper_bound),
1560  sigma)
1561  self.system.restraints.append(r)
1562  self._add_restraint_dataset(r) # so that all-dataset group works
1563 
1564  def _get_membrane(self, tor_R, tor_r, tor_th):
1565  """Get an object representing a half-torus membrane"""
1566  if not hasattr(self, '_seen_membranes'):
1567  self._seen_membranes = {}
1568  # If a membrane already exists with all dimensions within 0.01 of
1569  # this one, reuse it
1570  membrane_id = tuple(int(x * 100.) for x in (tor_R, tor_r, tor_th))
1571  if membrane_id not in self._seen_membranes:
1572  m = ihm.geometry.HalfTorus(center=self._center_origin,
1573  transformation=self._identity_transform,
1574  major_radius=tor_R, minor_radius=tor_r, thickness=tor_th,
1575  inner=True, name='Membrane')
1576  self._seen_membranes[membrane_id] = m
1577  return self._seen_membranes[membrane_id]
1578 
1579  def add_membrane_surface_location_restraint(
1580  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1581  self._add_membrane_restraint(state, ps, tor_R, tor_r, tor_th, sigma,
1582  pmi_restraint, ihm.restraint.UpperBoundDistanceRestraint(0.))
1583 
1584  def add_membrane_exclusion_restraint(
1585  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1586  self._add_membrane_restraint(state, ps, tor_R, tor_r, tor_th, sigma,
1587  pmi_restraint, ihm.restraint.LowerBoundDistanceRestraint(0.))
1588 
1589  def _add_membrane_restraint(self, state, ps, tor_R, tor_r, tor_th,
1590  sigma, pmi_restraint, rsr):
1591  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1592  r = _GeometricRestraint(self, state, pmi_restraint,
1593  self._get_membrane(tor_R, tor_r, tor_th),
1594  asym.get_feature(ps), rsr, sigma)
1595  self.system.restraints.append(r)
1596  self._add_restraint_dataset(r) # so that all-dataset group works
1597 
1598  def add_model(self, group, assembly=None, representation=None):
1599  state = self._last_state
1600  if representation is None:
1601  representation = self.default_representation
1602  protocol = self.all_protocols.get_last_protocol(state)
1603  m = _Model(state.prot, self, protocol,
1604  assembly if assembly else state.modeled_assembly,
1605  representation)
1606  group.append(m)
1607  return m
1608 
1609  def _update_locations(self, filelocs):
1610  """Update FileLocation to point to a parent repository, if any"""
1611  all_repos = [m for m in self._metadata
1612  if isinstance(m, ihm.location.Repository)]
1613  for fileloc in filelocs:
1614  ihm.location.Repository._update_in_repos(fileloc, all_repos)
1615 
1616  _metadata = property(lambda self:
1617  itertools.chain.from_iterable(self._each_metadata))
Select non water and non hydrogen atoms.
Definition: pdb.h:245
Simple 3D transformation class.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:158
Classes for attaching metadata to PMI objects.
Definition: pmi1/metadata.py:1
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
def create_representation
Create a new Representation and return it.
Definition: /mmcif.py:1173
def finalize
Do any final processing on the class hierarchy.
Definition: /mmcif.py:1302
Class for easy writing of PDBs, RMFs, and stat files.
Definition: /output.py:65
Class to encode a modeling protocol as mmCIF.
Definition: /mmcif.py:1122
A single asymmetric unit in the system.
Definition: /mmcif.py:1098
def exclude_coordinates
Don't record coordinates for the given domain.
Definition: /mmcif.py:1183
Base class for capturing a modeling protocol.
Definition: /output.py:40
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: /mmcif.py:1084
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:731
void read_pdb(TextInput input, int model, Hierarchy h)
Classes to represent data structures used in mmCIF.
Definition: data.py:1
void add_restraint(RMF::FileHandle fh, Restraint *hs)
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
Miscellaneous utilities.
Definition: /tools.py:1
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
Definition: /mmcif.py:1453
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: /mmcif.py:1088
Basic utilities for handling cryo-electron microscopy 3D density maps.
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: /mmcif.py:1106
def create_transformed_component
Make a new component that's a transformed copy of another.
Definition: /mmcif.py:1230
3D rotation class.
Definition: Rotation3D.h:51
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Representation of the system.
A decorator for a residue.
Definition: Residue.h:137
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
A single entity in the system.
Definition: /mmcif.py:1071
VectorD< 3 > Vector3D
Definition: VectorD.h:421
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: /mmcif.py:1110
Classes for writing output files and processing them.
Definition: /output.py:1
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:189
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...