IMP logo
IMP Reference Guide  2.9.0
The Integrative Modeling Platform
mmcif.py
1 """@namespace IMP.pmi.mmcif
2  @brief Support for the mmCIF file format.
3 
4  IMP has basic support for writing out files in mmCIF format, for
5  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
6  mmCIF files are currently generated by creating an
7  IMP.pmi.mmcif.ProtocolOutput class, and attaching it to an
8  IMP.pmi.representation.Representation object, after which any
9  generated models and metadata are collected and output as mmCIF.
10 """
11 
12 from __future__ import print_function
13 import copy
14 import IMP.core
15 import IMP.algebra
16 import IMP.atom
17 import IMP.em
18 import IMP.isd
20 import IMP.pmi.tools
21 from IMP.pmi.tools import OrderedDict
22 import IMP.pmi.output
23 import IMP.pmi.metadata
24 import IMP.mmcif.data
25 import re
26 import ast
27 import sys
28 import os
29 import textwrap
30 import weakref
31 import operator
32 import itertools
33 import ihm.location
34 import ihm.dataset
35 import ihm.dumper
36 import ihm.metadata
37 import ihm.startmodel
38 import ihm.model
39 import ihm.protocol
40 import ihm.analysis
41 import ihm.representation
42 import ihm.geometry
43 
44 def _assign_id(obj, seen_objs, obj_by_id):
45  """Assign a unique ID to obj, and track all ids in obj_by_id."""
46  if obj not in seen_objs:
47  if not hasattr(obj, 'id'):
48  obj_by_id.append(obj)
49  obj.id = len(obj_by_id)
50  seen_objs[obj] = obj.id
51  else:
52  obj.id = seen_objs[obj]
53 
54 
55 def _get_by_residue(p):
56  """Determine whether the given particle represents a specific residue
57  or a more coarse-grained object."""
59 
60 
61 class _ComponentMapper(object):
62  """Map a Particle to a component name"""
63  def __init__(self, prot):
64  self.o = IMP.pmi.output.Output()
65  self.prot = prot
66  self.name = 'cif-output'
67  self.o.dictionary_pdbs[self.name] = self.prot
68  self.o._init_dictchain(self.name, self.prot, multichar_chain=True)
69 
70  def __getitem__(self, p):
71  protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
72  return protname
73 
74 
75 class _AsymMapper(object):
76  """Map a Particle to an asym_unit"""
77  def __init__(self, simo, prot):
78  self.simo = simo
79  self._cm = _ComponentMapper(prot)
80 
81  def __getitem__(self, p):
82  protname = self._cm[p]
83  return self.simo.asym_units[protname]
84 
85  def get_feature(self, ps):
86  """Get an ihm.restraint.Feature that covers the given particles"""
87  # todo: handle things other than residues
88  rngs = []
89  for p in ps:
90  asym = self[p]
91  # todo: handle overlapping or adjoining ranges
93  rind = IMP.atom.Residue(p).get_index()
94  rngs.append(asym(rind, rind))
96  # PMI Fragments always contain contiguous residues
97  rinds = IMP.atom.Fragment(p).get_residue_indexes()
98  rngs.append(asym(rinds[0], rinds[-1]))
99  else:
100  raise ValueError("Unsupported particle type %s" % str(p))
101  return ihm.restraint.PolyResidueFeature(rngs)
102 
103 
104 class _AllSoftware(object):
105  def __init__(self, system):
106  self.system = system
107  self.modeller_used = self.phyre2_used = False
108  self.system.software.extend([
109  ihm.Software(
110  name="Integrative Modeling Platform (IMP)",
111  version=IMP.__version__,
112  classification="integrative model building",
113  description="integrative model building",
114  location='https://integrativemodeling.org'),
115  ihm.Software(
116  name="IMP PMI module",
117  version=IMP.pmi.__version__,
118  classification="integrative model building",
119  description="integrative model building",
120  location='https://integrativemodeling.org')])
121 
122  def set_modeller_used(self, version, date):
123  if self.modeller_used:
124  return
125  self.modeller_used = True
126  self.system.software.append(ihm.Software(
127  name='MODELLER', classification='comparative modeling',
128  description='Comparative modeling by satisfaction '
129  'of spatial restraints, build ' + date,
130  location='https://salilab.org/modeller/',
131  version=version))
132 
133  def set_phyre2_used(self):
134  if self.phyre2_used:
135  return
136  self.phyre2_used = True
137  self.system.software.append(ihm.Software(
138  name='Phyre2', classification='protein homology modeling',
139  description='Protein Homology/analogY Recognition '
140  'Engine V 2.0',
141  version='2.0',
142  location='http://www.sbg.bio.ic.ac.uk/~phyre2/'))
143 
144 def _get_fragment_is_rigid(fragment):
145  """Determine whether a fragment is modeled rigidly"""
146  leaves = IMP.atom.get_leaves(fragment.hier)
147  # Assume all leaves are set up as rigid/flexible in the same way
148  return IMP.core.RigidMember.get_is_setup(leaves[0])
149 
150 class _PDBFragment(ihm.representation.ResidueSegment):
151  """Record details about part of a PDB file used as input
152  for a component."""
153  def __init__(self, state, component, start, end, pdb_offset,
154  pdbname, chain, hier, asym_unit):
155  # start, end are PMI residue indexes (not IHM)
156  super(_PDBFragment, self).__init__(
157  asym_unit=asym_unit.pmi_range(start, end),
158  rigid=None, primitive='sphere')
159  self.component, self.start, self.end, self.offset, self.pdbname \
160  = component, start, end, pdb_offset, pdbname
161  self.state, self.chain, self.hier = state, chain, hier
164  self.starting_hier = IMP.atom.read_pdb(pdbname, state.m, sel)
165 
166  rigid = property(lambda self: _get_fragment_is_rigid(self),
167  lambda self, val: None)
168 
169  def combine(self, other):
170  pass
171 
172 
173 class _BeadsFragment(ihm.representation.FeatureSegment):
174  """Record details about beads used to represent part of a component."""
175  chain = None
176  def __init__(self, state, component, start, end, count, hier, asym_unit):
177  super(_BeadsFragment, self).__init__(asym_unit=asym_unit(start, end),
178  rigid=None, primitive='sphere', count=count)
179  self.state, self.component, self.hier \
180  = state, component, hier
181 
182  rigid = property(lambda self: _get_fragment_is_rigid(self),
183  lambda self, val: None)
184 
185  def combine(self, other):
186  # todo: don't combine if one fragment is rigid and the other flexible
187  if type(other) == type(self) and \
188  other.asym_unit.seq_id_range[0] == self.asym_unit.seq_id_range[1] + 1:
189  self.asym_unit.seq_id_range = (self.asym_unit.seq_id_range[0],
190  other.asym_unit.seq_id_range[1])
191  self.count += other.count
192  return True
193 
194 
195 class _AllModelRepresentations(object):
196  def __init__(self, simo):
197  self.simo = simo
198  # dict of fragments, ordered by representation, then component name,
199  # then state
200  self.fragments = OrderedDict()
201  self._all_representations = {}
202 
203  def copy_component(self, state, name, original, asym_unit):
204  """Copy all representation for `original` in `state` to `name`"""
205  def copy_frag(f):
206  newf = copy.copy(f)
207  newf.asym_unit = asym_unit(*f.asym_unit.seq_id_range)
208  return newf
209  for rep in self.fragments:
210  if original in self.fragments[rep]:
211  if name not in self.fragments[rep]:
212  self.fragments[rep][name] = OrderedDict()
213  self.fragments[rep][name][state] = [copy_frag(f)
214  for f in self.fragments[rep][original][state]]
215  # Assume representation for a component is the same in all
216  # states, so only write out the first one
217  first_state = list(self.fragments[rep][name].keys())[0]
218  if state is first_state:
219  representation = self._all_representations[rep]
220  representation.extend(self.fragments[rep][name][state])
221 
222  def add_fragment(self, state, representation, fragment):
223  """Add a model fragment."""
224  comp = fragment.component
225  id_rep = id(representation)
226  self._all_representations[id_rep] = representation
227  if id_rep not in self.fragments:
228  self.fragments[id_rep] = OrderedDict()
229  if comp not in self.fragments[id_rep]:
230  self.fragments[id_rep][comp] = OrderedDict()
231  if state not in self.fragments[id_rep][comp]:
232  self.fragments[id_rep][comp][state] = []
233  fragments = self.fragments[id_rep][comp][state]
234  if len(fragments) == 0 or not fragments[-1].combine(fragment):
235  fragments.append(fragment)
236  # Assume representation for a component is the same in all states,
237  # so only write out the first one
238  first_state = list(self.fragments[id_rep][comp].keys())[0]
239  if state is first_state:
240  representation.append(fragment)
241 
242 
243 class _AllDatasets(object):
244  """Track all datasets generated by PMI and add them to the ihm.System"""
245  def __init__(self, system):
246  self.system = system
247  self._datasets = []
248  self._datasets_by_state = {}
249  self._restraints_by_state = {}
250 
251  def get_all_group(self, state):
252  """Get a DatasetGroup encompassing all datasets so far in this state"""
253  # Note that if a restraint dataset is replaced after this is called,
254  # the group will still contain the old dataset - mark dataset as read
255  # only?
256  g = ihm.dataset.DatasetGroup(self._datasets_by_state.get(state, [])
257  + [r.dataset for r in self._restraints_by_state.get(state, [])
258  if r.dataset])
259  return g
260 
261  def add(self, state, dataset):
262  """Add a new dataset."""
263  self._datasets.append(dataset)
264  if state not in self._datasets_by_state:
265  self._datasets_by_state[state] = []
266  self._datasets_by_state[state].append(dataset)
267  # Make sure that the dataset ends up in the mmCIF file even if nothing
268  # refers to it
269  self.system.orphan_datasets.append(dataset)
270  return dataset
271 
272  def add_restraint(self, state, restraint):
273  """Add the dataset for a restraint"""
274  if state not in self._restraints_by_state:
275  self._restraints_by_state[state] = []
276  self._restraints_by_state[state].append(restraint)
277 
278 
279 class _CrossLinkRestraint(ihm.restraint.CrossLinkRestraint):
280  """Restrain to a set of cross links"""
281 
282  assembly = None
283  _label_map = {'wtDSS':'DSS', 'scDSS':'DSS', 'scEDC':'EDC'}
284 
285  def __init__(self, pmi_restraint):
286  self.pmi_restraint = pmi_restraint
287  label = self.pmi_restraint.label
288  # Map commonly-used subtypes to more standard labels
289  label = self._label_map.get(label, label)
290  self.label = label
291  super(_CrossLinkRestraint, self).__init__(
292  dataset=self.pmi_restraint.dataset,
293  linker_type=label)
294 
295  def _set_psi_sigma(self, model):
296  old_values = []
297  for resolution in self.pmi_restraint.sigma_dictionary:
298  statname = 'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, self.label)
299  if model.stats and statname in model.stats:
300  sigma = float(model.stats[statname])
301  p = self.pmi_restraint.sigma_dictionary[resolution][0]
302  old_values.append((p, p.get_scale()))
303  p.set_scale(sigma)
304  for psiindex in self.pmi_restraint.psi_dictionary:
305  statname = 'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, self.label)
306  if model.stats and statname in model.stats:
307  psi = float(model.stats[statname])
308  p = self.pmi_restraint.psi_dictionary[psiindex][0]
309  old_values.append((p, p.get_scale()))
310  p.set_scale(psi)
311  # If the same particle is set multiple times, make sure we get the
312  # original value last
313  return list(reversed(old_values))
314 
315  def add_fits_from_model_statfile(self, model):
316  # Set psi/sigma for all particles
317  old_values = self._set_psi_sigma(model)
318  # If no stats were collected, we can't show the fit
319  if not old_values:
320  return
321  for xl in self.cross_links:
322  # Get current psi/sigma particle value for each XL
323  xl.fits[model] = ihm.restraint.CrossLinkFit(
324  psi=xl.psi, sigma1=xl.sigma1, sigma2=xl.sigma2)
325  # Restore original psi/sigma
326  for p, scale in old_values:
327  p.set_scale(scale)
328 
329  # Have our dataset point to that in the original PMI restraint
330  def __set_dataset(self, val):
331  self.pmi_restraint.dataset = val
332  dataset = property(lambda self: self.pmi_restraint.dataset,
333  __set_dataset)
334 
335 
336 def get_asym_mapper_for_state(simo, state, asym_map):
337  asym = asym_map.get(state, None)
338  if asym is None:
339  asym = _AsymMapper(simo, state.prot)
340  asym_map[state] = asym
341  return asym
342 
343 class _PMICrossLink(object):
344  # Query PMI particles to get psi and sigma values at time of use, not
345  # time of construction (since they may be set after creating the restraint)
346  psi = property(lambda self: self.psi_p.get_scale(),
347  lambda self, val: None)
348  sigma1 = property(lambda self: self.sigma1_p.get_scale(),
349  lambda self, val: None)
350  sigma2 = property(lambda self: self.sigma2_p.get_scale(),
351  lambda self, val: None)
352 
353 
354 class _ResidueCrossLink(ihm.restraint.ResidueCrossLink, _PMICrossLink):
355  pass
356 
357 
358 class _FeatureCrossLink(ihm.restraint.FeatureCrossLink, _PMICrossLink):
359  pass
360 
361 
362 class _EM2DRestraint(ihm.restraint.EM2DRestraint):
363  def __init__(self, state, pmi_restraint, image_number, resolution,
364  pixel_size, image_resolution, projection_number,
365  micrographs_number):
366  self.pmi_restraint, self.image_number = pmi_restraint, image_number
367  super(_EM2DRestraint, self).__init__(
368  dataset=pmi_restraint.datasets[image_number],
369  assembly=state.modeled_assembly,
370  segment=False, number_raw_micrographs=micrographs_number,
371  pixel_size_width=pixel_size, pixel_size_height=pixel_size,
372  image_resolution=image_resolution,
373  number_of_projections=projection_number)
374 
375  # Have our dataset point to that in the original PMI restraint
376  def __get_dataset(self):
377  return self.pmi_restraint.datasets[self.image_number]
378  def __set_dataset(self, val):
379  self.pmi_restraint.datasets[self.image_number] = val
380  dataset = property(__get_dataset, __set_dataset)
381 
382  def add_fits_from_model_statfile(self, model):
383  ccc = self._get_cross_correlation(model)
384  transform = self._get_transformation(model)
385  rot = transform.get_rotation()
386  rm = [[e for e in rot.get_rotation_matrix_row(i)] for i in range(3)]
387  self.fits[model] = ihm.restraint.EM2DRestraintFit(
388  cross_correlation_coefficient=ccc,
389  rot_matrix=rm,
390  tr_vector=transform.get_translation())
391 
392  def _get_transformation(self, model):
393  """Get the transformation that places the model on the image"""
394  stats = model.em2d_stats or model.stats
395  prefix = 'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
396  self.image_number + 1)
397  r = [float(stats[prefix + '_Rotation%d' % i]) for i in range(4)]
398  t = [float(stats[prefix + '_Translation%d' % i])
399  for i in range(3)]
400  # If the model coordinates are transformed, need to back transform
401  # them first
402  inv = model.transform.get_inverse()
404  IMP.algebra.Vector3D(*t)) * inv
405 
406  def _get_cross_correlation(self, model):
407  """Get the cross correlation coefficient between the model projection
408  and the image"""
409  stats = model.em2d_stats or model.stats
410  return float(stats['ElectronMicroscopy2D_%s_Image%d_CCC'
411  % (self.pmi_restraint.label,
412  self.image_number + 1)])
413 
414 
415 class _EM3DRestraint(ihm.restraint.EM3DRestraint):
416 
417  def __init__(self, simo, state, pmi_restraint, target_ps, densities):
418  self.pmi_restraint = pmi_restraint
419  super(_EM3DRestraint, self).__init__(
420  dataset=pmi_restraint.dataset,
421  assembly=self._get_assembly(densities, simo, state),
422  fitting_method='Gaussian mixture models',
423  number_of_gaussians=len(target_ps))
424 
425  # Have our dataset point to that in the original PMI restraint
426  def __set_dataset(self, val):
427  self.pmi_restraint.dataset = val
428  dataset = property(lambda self: self.pmi_restraint.dataset,
429  __set_dataset)
430 
431  def _get_assembly(self, densities, simo, state):
432  """Get the Assembly that this restraint acts on"""
433  cm = _ComponentMapper(state.prot)
434  components = {}
435  for d in densities:
436  components[cm[d]] = None # None == all residues in this component
437  a = simo._get_subassembly(components,
438  name="EM subassembly",
439  description="All components that fit the EM map")
440  return a
441 
442  def add_fits_from_model_statfile(self, model):
443  ccc = self._get_cross_correlation(model)
444  self.fits[model] = ihm.restraint.EM3DRestraintFit(
445  cross_correlation_coefficient=ccc)
446 
447  def _get_cross_correlation(self, model):
448  """Get the cross correlation coefficient between the model
449  and the map"""
450  if model.stats is not None:
451  return float(model.stats['GaussianEMRestraint_%s_CCC'
452  % self.pmi_restraint.label])
453 
454 
455 class _GeometricRestraint(ihm.restraint.GeometricRestraint):
456 
457  def __init__(self, simo, state, pmi_restraint, geometric_object,
458  feature, distance, sigma):
459  self.pmi_restraint = pmi_restraint
460  super(_GeometricRestraint, self).__init__(
461  dataset=pmi_restraint.dataset,
462  geometric_object=geometric_object, feature=feature,
463  distance=distance, harmonic_force_constant = 1. / sigma,
464  restrain_all=True)
465 
466  # Have our dataset point to that in the original PMI restraint
467  def __set_dataset(self, val):
468  self.pmi_restraint.dataset = val
469  dataset = property(lambda self: self.pmi_restraint.dataset,
470  __set_dataset)
471 
472 
473 class _ReplicaExchangeProtocolStep(ihm.protocol.Step):
474  def __init__(self, state, rex):
475  if rex.monte_carlo_sample_objects is not None:
476  method = 'Replica exchange monte carlo'
477  else:
478  method = 'Replica exchange molecular dynamics'
479  super(_ReplicaExchangeProtocolStep, self).__init__(
480  assembly=state.modeled_assembly,
481  dataset_group=None, # filled in by add_step()
482  method=method, name='Sampling',
483  num_models_begin=None, # filled in by add_step()
484  num_models_end=rex.vars["number_of_frames"],
485  multi_scale=True, multi_state=False, ordered=False)
486 
487 
488 class _SimpleProtocolStep(ihm.protocol.Step):
489  def __init__(self, state, num_models_end, method):
490  super(_SimpleProtocolStep, self).__init__(
491  assembly=state.modeled_assembly,
492  dataset_group=None, # filled in by add_step()
493  method=method, name='Sampling',
494  num_models_begin=None, # filled in by add_step()
495  num_models_end=num_models_end,
496  multi_scale=True, multi_state=False, ordered=False)
497 
498 
499 class _Chain(object):
500  """Represent a single chain in a Model"""
501  def __init__(self, pmi_chain_id, asym_unit):
502  self.pmi_chain_id, self.asym_unit = pmi_chain_id, asym_unit
503  self.spheres = []
504  self.atoms = []
505 
506  def add(self, xyz, atom_type, residue_type, residue_index,
507  all_indexes, radius):
508  if atom_type is None:
509  self.spheres.append((xyz, residue_type, residue_index,
510  all_indexes, radius))
511  else:
512  self.atoms.append((xyz, atom_type, residue_type, residue_index,
513  all_indexes, radius))
514  orig_comp = property(lambda self: self.comp)
515 
516 class _TransformedChain(object):
517  """Represent a chain that is a transformed version of another"""
518  def __init__(self, orig_chain, asym_unit, transform):
519  self.orig_chain, self.asym_unit = orig_chain, asym_unit
520  self.transform = transform
521 
522  def __get_spheres(self):
523  for (xyz, residue_type, residue_index, all_indexes,
524  radius) in self.orig_chain.spheres:
525  yield (self.transform * xyz, residue_type, residue_index,
526  all_indexes, radius)
527  spheres = property(__get_spheres)
528 
529  def __get_atoms(self):
530  for (xyz, atom_type, residue_type, residue_index, all_indexes,
531  radius) in self.orig_chain.atoms:
532  yield (self.transform * xyz, atom_type, residue_type, residue_index,
533  all_indexes, radius)
534  atoms = property(__get_atoms)
535 
536  entity = property(lambda self: self.orig_chain.entity)
537  orig_comp = property(lambda self: self.orig_chain.comp)
538 
539 class _Excluder(object):
540  def __init__(self, component, simo):
541  self._seqranges = simo._exclude_coords.get(component, [])
542 
543  def is_excluded(self, indexes):
544  """Return True iff the given sequence range is excluded."""
545  for seqrange in self._seqranges:
546  if indexes[0] >= seqrange[0] and indexes[-1] <= seqrange[1]:
547  return True
548 
549 
550 class _Model(ihm.model.Model):
551  def __init__(self, prot, simo, protocol, assembly, representation):
552  super(_Model, self).__init__(assembly=assembly, protocol=protocol,
553  representation=representation)
554  self.simo = weakref.proxy(simo)
555  # Transformation from IMP coordinates into mmCIF coordinate system.
556  # Normally we pass through coordinates unchanged, but in some cases
557  # we may want to translate them (e.g. Nup84, where the deposited PDB
558  # files are all centered; we want the mmCIF files to match)
560  self.em2d_stats = None
561  self.stats = None
562  # True iff restraints act on this model
563  self._is_restrained = True
564  o = self.output = IMP.pmi.output.Output(atomistic=True)
565  name = 'cif-output'
566  o.dictionary_pdbs[name] = prot
567  o._init_dictchain(name, prot, multichar_chain=True)
568  (particle_infos_for_pdb,
569  self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
570  self.geometric_center = IMP.algebra.Vector3D(*self.geometric_center)
571  self._make_spheres_atoms(particle_infos_for_pdb, o, name, simo)
572  self.rmsf = {}
573 
574  def all_chains(self, simo):
575  """Yield all chains, including transformed ones"""
576  chain_for_comp = {}
577  for c in self.chains:
578  yield c
579  chain_for_comp[c.comp] = c
580  for tc in simo._transformed_components:
581  orig_chain = chain_for_comp.get(tc.original, None)
582  if orig_chain:
583  asym = simo.asym_units[tc.name]
584  c = _TransformedChain(orig_chain, asym, tc.transform)
585  c.comp = tc.name
586  yield c
587 
588  def _make_spheres_atoms(self, particle_infos_for_pdb, o, name, simo):
589  entity_for_chain = {}
590  comp_for_chain = {}
591  correct_asym = {}
592  for protname, chain_id in o.dictchain[name].items():
593  entity_for_chain[chain_id] = simo.entities[protname]
594  comp_for_chain[chain_id] = protname
595  # When doing multi-state modeling, the chain ID returned here
596  # (assigned sequentially) might not be correct (states may have
597  # gaps in the chain IDs). Map it to the correct asym unit.
598  correct_asym[chain_id] = simo.asym_units[protname]
599 
600  # Gather by chain ID (should be sorted by chain ID already)
601  self.chains = []
602  chain = None
603  excluder = None
604 
605  for (xyz, atom_type, residue_type, chain_id, residue_index,
606  all_indexes, radius) in particle_infos_for_pdb:
607  if chain is None or chain.pmi_chain_id != chain_id:
608  chain = _Chain(chain_id, correct_asym[chain_id])
609  chain.entity = entity_for_chain[chain_id]
610  chain.comp = comp_for_chain[chain_id]
611  self.chains.append(chain)
612  excluder = _Excluder(chain.comp, simo)
613  if not excluder.is_excluded(all_indexes if all_indexes
614  else [residue_index]):
615  chain.add(xyz, atom_type, residue_type, residue_index,
616  all_indexes, radius)
617 
618  def parse_rmsf_file(self, fname, component):
619  self.rmsf[component] = rmsf = {}
620  with open(fname) as fh:
621  for line in fh:
622  resnum, blocknum, val = line.split()
623  rmsf[int(resnum)] = (int(blocknum), float(val))
624 
625  def get_rmsf(self, component, indexes):
626  """Get the RMSF value for the given residue indexes."""
627  if not self.rmsf:
628  return None
629  rmsf = self.rmsf[component]
630  blocknums = dict.fromkeys(rmsf[ind][0] for ind in indexes)
631  if len(blocknums) != 1:
632  raise ValueError("Residue indexes %s aren't all in the same block"
633  % str(indexes))
634  return rmsf[indexes[0]][1]
635 
636  def get_atoms(self):
637  for chain in self.all_chains(self.simo):
638  pmi_offset = chain.asym_unit.entity.pmi_offset
639  for atom in chain.atoms:
640  (xyz, atom_type, residue_type, residue_index,
641  all_indexes, radius) = atom
642  pt = self.transform * xyz
643  yield ihm.model.Atom(asym_unit=chain.asym_unit,
644  seq_id=residue_index - pmi_offset,
645  atom_id=atom_type.get_string(),
646  type_symbol=None, # todo: get element
647  x=pt[0], y=pt[1], z=pt[2])
648 
649  def get_spheres(self):
650  for chain in self.all_chains(self.simo):
651  pmi_offset = chain.asym_unit.entity.pmi_offset
652  for sphere in chain.spheres:
653  (xyz, residue_type, residue_index,
654  all_indexes, radius) = sphere
655  if all_indexes is None:
656  all_indexes = (residue_index,)
657  pt = self.transform * xyz
658  yield ihm.model.Sphere(asym_unit=chain.asym_unit,
659  seq_id_range=(all_indexes[0] - pmi_offset,
660  all_indexes[-1] - pmi_offset),
661  x=pt[0], y=pt[1], z=pt[2], radius=radius,
662  rmsf=self.get_rmsf(chain.orig_comp, all_indexes))
663 
664 
665 class _AllProtocols(object):
666  def __init__(self, simo):
667  self.simo = weakref.proxy(simo)
668  # Lists of Protocols by state
669  self.protocols = OrderedDict()
670 
671  def add_protocol(self, state):
672  """Add a new Protocol"""
673  if state not in self.protocols:
674  self.protocols[state] = []
675  p = ihm.protocol.Protocol()
676  self.simo.system.orphan_protocols.append(p)
677  self.protocols[state].append(p)
678 
679  def add_step(self, step, state):
680  """Add a ProtocolStep to the last Protocol of the given State"""
681  if state not in self.protocols:
682  self.add_protocol(state)
683  protocol = self.get_last_protocol(state)
684  if len(protocol.steps) == 0:
685  step.num_models_begin = 0
686  else:
687  step.num_models_begin = protocol.steps[-1].num_models_end
688  protocol.steps.append(step)
689  step.id = len(protocol.steps)
690  # Assume that protocol uses all currently-defined datasets
691  step.dataset_group = self.simo.all_datasets.get_all_group(state)
692 
693  def add_postproc(self, step, state):
694  """Add a postprocessing step to the last protocol"""
695  protocol = self.get_last_protocol(state)
696  if len(protocol.analyses) == 0:
697  protocol.analyses.append(ihm.analysis.Analysis())
698  protocol.analyses[-1].steps.append(step)
699 
700  def get_last_protocol(self, state):
701  """Return the most recently-added _Protocol"""
702  return self.protocols[state][-1]
703 
704 
705 class _AllStartingModels(object):
706  def __init__(self, simo):
707  self.simo = simo
708  # dict of starting models (entire PDB files), collected from fragments,
709  # ordered by component name and state
710  self.models = OrderedDict()
711  self.output = IMP.pmi.output.Output()
712 
713  def add_pdb_fragment(self, fragment):
714  """Add a starting model PDB fragment."""
715  comp = fragment.component
716  state = fragment.state
717  if comp not in self.models:
718  self.models[comp] = OrderedDict()
719  if state not in self.models[comp]:
720  self.models[comp][state] = []
721  models = self.models[comp][state]
722  if len(models) == 0 \
723  or models[-1].fragments[0].pdbname != fragment.pdbname:
724  model = self._add_model(fragment)
725  models.append(model)
726  else:
727  # Break circular ref between fragment and model
728  models[-1].fragments.append(weakref.proxy(fragment))
729  # Update residue range to cover all fragments
730  pmi_offset = models[-1].asym_unit.entity.pmi_offset
731  sid_begin = min(fragment.start + fragment.offset - pmi_offset,
732  models[-1].asym_unit.seq_id_range[0])
733  sid_end = max(fragment.end + fragment.offset - pmi_offset,
734  models[-1].asym_unit.seq_id_range[1])
735  models[-1].asym_unit = fragment.asym_unit.asym(sid_begin, sid_end)
736  fragment.starting_model = models[-1]
737 
738  def _add_model(self, f):
739  parser = ihm.metadata.PDBParser()
740  r = parser.parse_file(f.pdbname)
741 
742  self.simo._add_dataset(r['dataset'])
743  # We only want the templates that model the starting model chain
744  templates = r['templates'].get(f.chain, [])
745  for t in templates:
746  if t.alignment_file:
747  self.simo.system.locations.append(t.alignment_file)
748  if t.dataset:
749  self.simo._add_dataset(t.dataset)
750  pmi_offset = f.asym_unit.entity.pmi_offset
751  m = _StartingModel(
752  asym_unit=f.asym_unit.asym.pmi_range(f.start + f.offset,
753  f.end + f.offset),
754  dataset=r['dataset'], asym_id=f.chain,
755  templates=templates, offset=f.offset + pmi_offset,
756  metadata=r['metadata'],
757  software=r['software'][0] if r['software'] else None,
758  script_file=r['script'])
759  m.fragments = [weakref.proxy(f)]
760  return m
761 
762 
763 class _StartingModel(ihm.startmodel.StartingModel):
764  def get_seq_dif(self):
765  return self._seq_dif
766 
767  def get_atoms(self):
768  pmi_offset = self.asym_unit.entity.pmi_offset
769  mh = IMP.mmcif.data._StartingModelAtomHandler(self.templates,
770  self.asym_unit)
771  for f in self.fragments:
772  sel = IMP.atom.Selection(f.starting_hier,
773  residue_indexes=list(range(f.start - f.offset,
774  f.end - f.offset + 1)))
775  for a in mh.get_ihm_atoms(sel.get_selected_particles(),
776  f.offset - pmi_offset):
777  yield a
778  self._seq_dif = mh._seq_dif
779 
780 
781 class _ReplicaExchangeAnalysisPostProcess(ihm.analysis.ClusterStep):
782  """Post processing using AnalysisReplicaExchange0 macro"""
783 
784  def __init__(self, rex, num_models_begin):
785  self.rex = rex
786  num_models_end = 0
787  for fname in self.get_all_stat_files():
788  with open(fname) as fh:
789  num_models_end += len(fh.readlines())
790  super(_ReplicaExchangeAnalysisPostProcess, self).__init__(
791  feature='RMSD', num_models_begin=num_models_begin,
792  num_models_end=num_models_end)
793 
794  def get_stat_file(self, cluster_num):
795  return os.path.join(self.rex._outputdir, "cluster.%d" % cluster_num,
796  'stat.out')
797 
798  def get_all_stat_files(self):
799  for i in range(self.rex._number_of_clusters):
800  yield self.get_stat_file(i)
801 
802 
803 class _ReplicaExchangeAnalysisEnsemble(ihm.model.Ensemble):
804  """Ensemble generated using AnalysisReplicaExchange0 macro"""
805 
806  num_models_deposited = None # Override base class property
807 
808  def __init__(self, pp, cluster_num, model_group, num_deposit):
809  with open(pp.get_stat_file(cluster_num)) as fh:
810  num_models = len(fh.readlines())
811  super(_ReplicaExchangeAnalysisEnsemble, self).__init__(
812  num_models=num_models,
813  model_group=model_group, post_process=pp,
814  clustering_feature=pp.feature,
815  name=model_group.name)
816  self.cluster_num = cluster_num
817  self.num_models_deposited = num_deposit
818 
819  def get_rmsf_file(self, component):
820  return os.path.join(self.post_process.rex._outputdir,
821  'cluster.%d' % self.cluster_num,
822  'rmsf.%s.dat' % component)
823 
824  def load_rmsf(self, model, component):
825  fname = self.get_rmsf_file(component)
826  if os.path.exists(fname):
827  model.parse_rmsf_file(fname, component)
828 
829  def get_localization_density_file(self, fname):
830  return os.path.join(self.post_process.rex._outputdir,
831  'cluster.%d' % self.cluster_num,
832  '%s.mrc' % fname)
833 
834  def load_localization_density(self, state, fname, select_tuple, asym_units):
835  fullpath = self.get_localization_density_file(fname)
836  if os.path.exists(fullpath):
837  details = "Localization density for %s %s" \
838  % (fname, self.model_group.name)
839  local_file = ihm.location.OutputFileLocation(fullpath,
840  details=details)
841  for s in select_tuple:
842  if isinstance(s, tuple) and len(s) == 3:
843  asym = asym_units[s[2]].pmi_range(s[0], s[1])
844  else:
845  asym = asym_units[s]
846  den = ihm.model.LocalizationDensity(file=local_file,
847  asym_unit=asym)
848  self.densities.append(den)
849 
850  def load_all_models(self, simo, state):
851  stat_fname = self.post_process.get_stat_file(self.cluster_num)
852  model_num = 0
853  with open(stat_fname) as fh:
854  stats = ast.literal_eval(fh.readline())
855  # Correct path
856  rmf_file = os.path.join(os.path.dirname(stat_fname),
857  "%d.rmf3" % model_num)
858  for c in state.all_modeled_components:
859  # todo: this only works with PMI 1
860  state._pmi_object.set_coordinates_from_rmf(c, rmf_file, 0,
861  force_rigid_update=True)
862  # todo: fill in other data from stat file, e.g. crosslink phi/psi
863  yield stats
864  model_num += 1
865  if model_num >= self.num_models_deposited:
866  return
867 
868  # todo: also support dRMS precision
869  def _get_precision(self):
870  precfile = os.path.join(self.post_process.rex._outputdir,
871  "precision.%d.%d.out" % (self.cluster_num,
872  self.cluster_num))
873  if not os.path.exists(precfile):
874  return ihm.unknown
875  # Fail if the precision.x.x.out file doesn't match the cluster
876  r = re.compile('All .*/cluster.%d/ average centroid distance ([\d\.]+)'
877  % self.cluster_num)
878  with open(precfile) as fh:
879  for line in fh:
880  m = r.match(line)
881  if m:
882  return float(m.group(1))
883 
884  precision = property(lambda self: self._get_precision(),
885  lambda self, val: None)
886 
887 class _SimpleEnsemble(ihm.model.Ensemble):
888  """Simple manually-created ensemble"""
889 
890  num_models_deposited = None # Override base class property
891 
892  def __init__(self, pp, model_group, num_models, drmsd,
893  num_models_deposited, ensemble_file):
894  super(_SimpleEnsemble, self).__init__(
895  model_group=model_group, post_process=pp, num_models=num_models,
896  file=ensemble_file, precision=drmsd, name=model_group.name,
897  clustering_feature='dRMSD')
898  self.num_models_deposited = num_models_deposited
899 
900  def load_localization_density(self, state, component, asym, local_file):
901  den = ihm.model.LocalizationDensity(file=local_file, asym_unit=asym)
902  self.densities.append(den)
903 
904 
905 class _EntityMapper(dict):
906  """Handle mapping from IMP components to CIF entities.
907  Multiple components may map to the same entity if they share sequence."""
908  def __init__(self, system):
909  super(_EntityMapper, self).__init__()
910  self._sequence_dict = {}
911  self._entities = []
912  self.system = system
913 
914  def add(self, component_name, sequence, offset):
915  def entity_seq(sequence):
916  # Map X to UNK
917  if 'X' in sequence:
918  return ['UNK' if s == 'X' else s for s in sequence]
919  else:
920  return sequence
921  if sequence not in self._sequence_dict:
922  # Use the name of the first component, stripped of any copy number,
923  # as the description of the entity
924  d = component_name.split("@")[0].split(".")[0]
925  entity = Entity(entity_seq(sequence), description=d,
926  pmi_offset=offset)
927  self.system.entities.append(entity)
928  self._sequence_dict[sequence] = entity
929  self[component_name] = self._sequence_dict[sequence]
930 
931 
932 class _TransformedComponent(object):
933  def __init__(self, name, original, transform):
934  self.name, self.original, self.transform = name, original, transform
935 
936 
937 class _State(ihm.model.State):
938  """Representation of a single state in the system."""
939 
940  def __init__(self, pmi_object, po):
941  # Point to the PMI object for this state. Use a weak reference
942  # since the state object typically points to us too, so we need
943  # to break the reference cycle. In PMI1 this will be a
944  # Representation object.
945  self._pmi_object = weakref.proxy(pmi_object)
946  self._pmi_state = pmi_object.state
947  # Preserve PMI state name
948  old_name = self.name
949  super(_State, self).__init__(experiment_type='Fraction of bulk')
950  self.name = old_name
951 
952  # The assembly of all components modeled by IMP in this state.
953  # This may be smaller than the complete assembly.
954  self.modeled_assembly = ihm.Assembly(
955  name="Modeled assembly",
956  description=self.get_postfixed_name(
957  "All components modeled by IMP"))
958  po.system.orphan_assemblies.append(self.modeled_assembly)
959 
960  self.all_modeled_components = []
961 
962  def __hash__(self):
963  return hash(self._pmi_state)
964  def __eq__(self, other):
965  return self._pmi_state == other._pmi_state
966 
967  def add_model_group(self, group):
968  self.append(group)
969  return group
970 
971  def get_prefixed_name(self, name):
972  """Prefix the given name with the state name, if available."""
973  if self.short_name:
974  return self.short_name + ' ' + name
975  else:
976  # Can't use capitalize() since that un-capitalizes everything
977  # but the first letter
978  return name[0].upper() + name[1:] if name else ''
979 
980  def get_postfixed_name(self, name):
981  """Postfix the given name with the state name, if available."""
982  if self.short_name:
983  return "%s in state %s" % (name, self.short_name)
984  else:
985  return name
986 
987  short_name = property(lambda self: self._pmi_state.short_name)
988  long_name = property(lambda self: self._pmi_state.long_name)
989  def __get_name(self):
990  return self._pmi_state.long_name
991  def __set_name(self, val):
992  self._pmi_state.long_name = val
993  name = property(__get_name, __set_name)
994 
995 
996 class Entity(ihm.Entity):
997  """A single entity in the system.
998  This functions identically to the base ihm.Entity class, but it
999  allows identifying residues by either the IHM numbering scheme
1000  (seq_id, which is always contiguous starting at 1) or by the PMI
1001  scheme (which is similar, but may not start at 1 if the sequence in
1002  the FASTA file has one or more N-terminal gaps"""
1003  def __init__(self, sequence, pmi_offset, *args, **keys):
1004  # Offset between PMI numbering and IHM; <pmi_#> = <ihm_#> + pmi_offset
1005  # (pmi_offset is also the number of N-terminal gaps in the FASTA file)
1006  self.pmi_offset = pmi_offset
1007  super(Entity, self).__init__(sequence, *args, **keys)
1008 
1009  def pmi_residue(self, res_id):
1010  """Return a single IHM residue indexed using PMI numbering"""
1011  return self.residue(res_id - self.pmi_offset)
1012 
1013  def pmi_range(self, res_id_begin, res_id_end):
1014  """Return a range of IHM residues indexed using PMI numbering"""
1015  off = self.pmi_offset
1016  return self(res_id_begin - off, res_id_end - off)
1017 
1018 
1019 class AsymUnit(ihm.AsymUnit):
1020  """A single asymmetric unit in the system.
1021  This functions identically to the base ihm.AsymUnit class, but it
1022  allows identifying residues by either the IHM numbering scheme
1023  (seq_id, which is always contiguous starting at 1) or by the PMI
1024  scheme (which is similar, but may not start at 1 if the sequence in
1025  the FASTA file has one or more N-terminal gaps"""
1026 
1027  def __init__(self, entity, *args, **keys):
1028  super(AsymUnit, self).__init__(
1029  entity, auth_seq_id_map=entity.pmi_offset, *args, **keys)
1030 
1031  def pmi_residue(self, res_id):
1032  """Return a single IHM residue indexed using PMI numbering"""
1033  return self.residue(res_id - self.entity.pmi_offset)
1034 
1035  def pmi_range(self, res_id_begin, res_id_end):
1036  """Return a range of IHM residues indexed using PMI numbering"""
1037  off = self.entity.pmi_offset
1038  return self(res_id_begin - off, res_id_end - off)
1039 
1040 
1042  """Class to encode a modeling protocol as mmCIF.
1043 
1044  IMP has basic support for writing out files in mmCIF format, for
1045  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
1046  After creating an instance of this class, attach it to an
1047  IMP.pmi.representation.Representation object. After this, any
1048  generated models and metadata are automatically collected and
1049  output as mmCIF.
1050  """
1051  def __init__(self, fh):
1052  # Ultimately, collect data in an ihm.System object
1053  self.system = ihm.System()
1054  self._state_group = ihm.model.StateGroup()
1055  self.system.state_groups.append(self._state_group)
1056 
1057  self.fh = fh
1058  self._state_ensemble_offset = 0
1059  self._each_metadata = [] # list of metadata for each representation
1060  self._file_datasets = []
1061  self._main_script = os.path.abspath(sys.argv[0])
1062 
1063  # Point to the main modeling script
1064  loc = ihm.location.WorkflowFileLocation(path=self._main_script,
1065  details="The main integrative modeling script")
1066  self.system.locations.append(loc)
1067 
1068  self._states = {}
1069  self.__asym_states = {}
1070  self._working_directory = os.getcwd()
1071  self.default_representation = self.create_representation(
1072  "Default representation")
1073  self.entities = _EntityMapper(self.system)
1074  # Mapping from component names to ihm.AsymUnit
1075  self.asym_units = {}
1076  self._all_components = {}
1077  self.all_modeled_components = []
1078  self._transformed_components = []
1079  self.sequence_dict = {}
1080 
1081  # Common geometric objects used in PMI systems
1082  self._xy_plane = ihm.geometry.XYPlane()
1083  self._center_origin = ihm.geometry.Center(0,0,0)
1084  self._identity_transform = ihm.geometry.Transformation.identity()
1085 
1086  # Coordinates to exclude
1087  self._exclude_coords = {}
1088 
1089  self.all_representations = _AllModelRepresentations(self)
1090  self.all_protocols = _AllProtocols(self)
1091  self.all_datasets = _AllDatasets(self.system)
1092  self.all_starting_models = _AllStartingModels(self)
1093 
1094  self.all_software = _AllSoftware(self.system)
1095 
1096  def create_representation(self, name):
1097  """Create a new Representation and return it. This can be
1098  passed to add_model(), add_bead_element() or add_pdb_element()."""
1099  r = ihm.representation.Representation()
1100  self.system.orphan_representations.append(r)
1101  return r
1102 
1103  def exclude_coordinates(self, component, seqrange):
1104  """Don't record coordinates for the given domain.
1105  Coordinates for the given domain (specified by a component name
1106  and a 2-element tuple giving the start and end residue numbers)
1107  will be excluded from the mmCIF file. This can be used to exclude
1108  parts of the structure that weren't well resolved in modeling.
1109  Any bead or residue that lies wholly within this range will be
1110  excluded. Multiple ranges for a given component can be excluded
1111  by calling this method multiple times."""
1112  if component not in self._exclude_coords:
1113  self._exclude_coords[component] = []
1114  self._exclude_coords[component].append(seqrange)
1115 
1116  def _is_excluded(self, component, start, end):
1117  """Return True iff this chunk of sequence should be excluded"""
1118  for seqrange in self._exclude_coords.get(component, ()):
1119  if start >= seqrange[0] and end <= seqrange[1]:
1120  return True
1121 
1122  def _add_state(self, state):
1123  """Create a new state and return a pointer to it."""
1124  self._state_ensemble_offset = len(self.system.ensembles)
1125  s = _State(state, self)
1126  self._state_group.append(s)
1127  self._last_state = s
1128  return s
1129 
1130  def get_file_dataset(self, fname):
1131  for d in self._file_datasets:
1132  fd = d.get(os.path.abspath(fname), None)
1133  if fd:
1134  return fd
1135 
1136  def _get_chain_for_component(self, name, output):
1137  """Get the chain ID for a component, if any."""
1138  # todo: handle multiple copies
1139  if name in self.asym_units:
1140  return self.asym_units[name]._id
1141  else:
1142  # A non-modeled component doesn't have a chain ID
1143  return None
1144 
1145  def _get_assembly_comps(self, assembly):
1146  """Get the names of the components in the given assembly"""
1147  # component name = asym_unit.details
1148  comps = {}
1149  for ca in assembly:
1150  comps[ca.details] = None
1151  return comps
1152 
1153  def create_transformed_component(self, state, name, original, transform):
1154  """Make a new component that's a transformed copy of another.
1155  All representation for the existing component is copied to the
1156  new one."""
1157  assembly_comps = self._get_assembly_comps(state.modeled_assembly)
1158  if name in assembly_comps:
1159  raise ValueError("Component %s already exists" % name)
1160  elif original not in assembly_comps:
1161  raise ValueError("Original component %s does not exist" % original)
1162  self.create_component(state, name, True)
1163  self.add_component_sequence(state, name, self.sequence_dict[original])
1164  self._transformed_components.append(_TransformedComponent(
1165  name, original, transform))
1166  self.all_representations.copy_component(state, name, original,
1167  self.asym_units[name])
1168 
1169  def create_component(self, state, name, modeled):
1170  new_comp = name not in self._all_components
1171  self._all_components[name] = None
1172  if modeled:
1173  state.all_modeled_components.append(name)
1174  if new_comp:
1175  self.asym_units[name] = None # assign asym once we get sequence
1176  self.all_modeled_components.append(name)
1177 
1178  def add_component_sequence(self, state, name, seq):
1179  def get_offset(seq):
1180  # Count length of gaps at start of sequence
1181  for i in range(len(seq)):
1182  if seq[i] != '-':
1183  return seq[i:], i
1184  seq, offset = get_offset(seq)
1185  if name in self.sequence_dict:
1186  if self.sequence_dict[name] != seq:
1187  raise ValueError("Sequence mismatch for component %s" % name)
1188  else:
1189  self.sequence_dict[name] = seq
1190  self.entities.add(name, seq, offset)
1191  if name in self.asym_units:
1192  if self.asym_units[name] is None:
1193  # Set up a new asymmetric unit for this component
1194  entity = self.entities[name]
1195  asym = AsymUnit(entity, details=name)
1196  self.system.asym_units.append(asym)
1197  self.asym_units[name] = asym
1198  state.modeled_assembly.append(self.asym_units[name])
1199 
1200  def _add_restraint_model_fits(self):
1201  """Add fits to restraints for all known models"""
1202  for group, m in self.system._all_models():
1203  if m._is_restrained:
1204  for r in self.system.restraints:
1205  if hasattr(r, 'add_fits_from_model_statfile'):
1206  r.add_fits_from_model_statfile(m)
1207 
1208  def _add_em2d_raw_micrographs(self):
1209  """If the deprecated metadata.EMMicrographsDataset class was used,
1210  transfer its data to the EM2D restraint."""
1211  for r in self.system.restraints:
1212  if isinstance(r, _EM2DRestraint):
1213  for d in r.dataset.parents:
1214  if isinstance(d, IMP.pmi.metadata.EMMicrographsDataset):
1215  r.number_raw_micrographs = d.number
1216  if isinstance(d.location,
1217  IMP.pmi.metadata.FileLocation):
1218  d.location.content_type = \
1219  ihm.location.InputFileLocation.content_type
1220 
1221  def flush(self):
1222  self.finalize()
1223  ihm.dumper.write(self.fh, [self.system])
1224 
1225  def finalize(self):
1226  """Do any final processing on the class hierarchy.
1227  After calling this method, the `system` member (an instance
1228  of `ihm.System`) completely reproduces the PMI modeling, and
1229  can be written out to an mmCIF file with `ihm.dumper.write`,
1230  and/or modified using the ihm API."""
1231  self._add_restraint_model_fits()
1232 
1233  self._add_em2d_raw_micrographs()
1234 
1235  # Add metadata to ihm.System
1236  self.system.software.extend(m for m in self._metadata
1237  if isinstance(m, ihm.Software))
1238  self.system.citations.extend(m for m in self._metadata
1239  if isinstance(m, ihm.Citation))
1240  self.system.locations.extend(m for m in self._metadata
1241  if isinstance(m, ihm.location.FileLocation))
1242 
1243  # Point all locations to repos, if applicable
1244  all_repos = [m for m in self._metadata
1245  if isinstance(m, ihm.location.Repository)]
1246  self.system.update_locations_in_repositories(all_repos)
1247 
1248  def add_pdb_element(self, state, name, start, end, offset, pdbname,
1249  chain, hier, representation=None):
1250  if self._is_excluded(name, start, end):
1251  return
1252  if representation is None:
1253  representation = self.default_representation
1254  asym = self.asym_units[name]
1255  p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
1256  hier, asym)
1257  self.all_representations.add_fragment(state, representation, p)
1258  self.all_starting_models.add_pdb_fragment(p)
1259 
1260  def add_bead_element(self, state, name, start, end, num, hier,
1261  representation=None):
1262  if self._is_excluded(name, start, end):
1263  return
1264  if representation is None:
1265  representation = self.default_representation
1266  asym = self.asym_units[name]
1267  pmi_offset = asym.entity.pmi_offset
1268  b = _BeadsFragment(state, name, start - pmi_offset, end - pmi_offset,
1269  num, hier, asym)
1270  self.all_representations.add_fragment(state, representation, b)
1271 
1272  def get_cross_link_group(self, pmi_restraint):
1273  r = _CrossLinkRestraint(pmi_restraint)
1274  self.system.restraints.append(r)
1275  self._add_restraint_dataset(r) # so that all-dataset group works
1276  return r
1277 
1278  def add_experimental_cross_link(self, r1, c1, r2, c2, rsr):
1279  if c1 not in self._all_components or c2 not in self._all_components:
1280  # Crosslink refers to a component we didn't model
1281  # As a quick hack, just ignore it.
1282  # todo: need to add an entity for this anyway (so will need the
1283  # sequence, and add to struct_assembly)
1284  return None
1285  e1 = self.entities[c1]
1286  e2 = self.entities[c2]
1287  xl = ihm.restraint.ExperimentalCrossLink(residue1=e1.pmi_residue(r1),
1288  residue2=e2.pmi_residue(r2))
1289  rsr.experimental_cross_links.append([xl])
1290  return xl
1291 
1292  def add_cross_link(self, state, ex_xl, p1, p2, length, sigma1_p, sigma2_p,
1293  psi_p, rsr):
1294  # Map p1/p2 to asym units
1295  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1296  d = ihm.restraint.UpperBoundDistanceRestraint(length)
1297 
1298  if _get_by_residue(p1) and _get_by_residue(p2):
1299  cls = _ResidueCrossLink
1300  else:
1301  cls = _FeatureCrossLink
1302  xl = cls(ex_xl, asym1=asym[p1], asym2=asym[p2], distance=d,
1303  restrain_all=True)
1304  # Needed to look up fits later
1305  xl.psi_p, xl.sigma1_p, xl.sigma2_p = psi_p, sigma1_p, sigma2_p
1306  rsr.cross_links.append(xl)
1307 
1308  def add_replica_exchange(self, state, rex):
1309  # todo: allow for metadata to say how many replicas were used in the
1310  # actual experiment, and how many independent runs were carried out
1311  # (use these as multipliers to get the correct total number of
1312  # output models)
1313  self.all_protocols.add_step(_ReplicaExchangeProtocolStep(state, rex),
1314  state)
1315 
1316  def _add_simple_dynamics(self, num_models_end, method):
1317  # Always assumed that we're dealing with the last state
1318  state = self._last_state
1319  self.all_protocols.add_step(_SimpleProtocolStep(state, num_models_end,
1320  method), state)
1321 
1322  def _add_protocol(self):
1323  # Always assumed that we're dealing with the last state
1324  state = self._last_state
1325  self.all_protocols.add_protocol(state)
1326 
1327  def _add_dataset(self, dataset):
1328  return self.all_datasets.add(self._last_state, dataset)
1329 
1330  def _add_restraint_dataset(self, restraint):
1331  return self.all_datasets.add_restraint(self._last_state, restraint)
1332 
1333  def _add_simple_postprocessing(self, num_models_begin, num_models_end):
1334  # Always assumed that we're dealing with the last state
1335  state = self._last_state
1336  pp = ihm.analysis.ClusterStep('RMSD', num_models_begin, num_models_end)
1337  self.all_protocols.add_postproc(pp, state)
1338  return pp
1339 
1340  def _add_no_postprocessing(self, num_models):
1341  # Always assumed that we're dealing with the last state
1342  state = self._last_state
1343  pp = ihm.analysis.EmptyStep()
1344  pp.num_models_begin = pp.num_models_end = num_models
1345  self.all_protocols.add_postproc(pp, state)
1346  return pp
1347 
1348  def _add_simple_ensemble(self, pp, name, num_models, drmsd,
1349  num_models_deposited, localization_densities,
1350  ensemble_file):
1351  """Add an ensemble generated by ad hoc methods (not using PMI).
1352  This is currently only used by the Nup84 system."""
1353  # Always assumed that we're dealing with the last state
1354  state = self._last_state
1355  group = ihm.model.ModelGroup(name=state.get_postfixed_name(name))
1356  state.add_model_group(group)
1357  if ensemble_file:
1358  # Deprecated Location class does not state input vs output
1359  if isinstance(ensemble_file, IMP.pmi.metadata.FileLocation):
1360  ensemble_file.content_type = \
1361  ihm.location.OutputFileLocation.content_type
1362  self.system.locations.append(ensemble_file)
1363  e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
1364  ensemble_file)
1365  self.system.ensembles.append(e)
1366  for c in state.all_modeled_components:
1367  den = localization_densities.get(c, None)
1368  if den:
1369  # Deprecated Location class does not state input vs output
1370  if isinstance(den, IMP.pmi.metadata.FileLocation):
1371  den.content_type = \
1372  ihm.location.OutputFileLocation.content_type
1373  e.load_localization_density(state, c, self.asym_units[c], den)
1374  return e
1375 
1376  def set_ensemble_file(self, i, location):
1377  """Point a previously-created ensemble to an 'all-models' file.
1378  This could be a trajectory such as DCD, an RMF, or a multimodel
1379  PDB file."""
1380  # Deprecated Location class does not state input vs output
1381  if isinstance(location, IMP.pmi.metadata.FileLocation):
1382  location.content_type = ihm.location.OutputFileLocation.content_type
1383  self.system.locations.append(location)
1384  # Ensure that we point to an ensemble related to the current state
1385  ind = i + self._state_ensemble_offset
1386  self.system.ensembles[ind].file = location
1387 
1388  def add_replica_exchange_analysis(self, state, rex, density_custom_ranges):
1389  # todo: add prefilter as an additional postprocess step (complication:
1390  # we don't know how many models it filtered)
1391  # todo: postpone rmsf/density localization extraction until after
1392  # relevant methods are called (currently it is done from the
1393  # constructor)
1394  protocol = self.all_protocols.get_last_protocol(state)
1395  num_models = protocol.steps[-1].num_models_end
1396  pp = _ReplicaExchangeAnalysisPostProcess(rex, num_models)
1397  self.all_protocols.add_postproc(pp, state)
1398  for i in range(rex._number_of_clusters):
1399  group = ihm.model.ModelGroup(name=state.get_prefixed_name(
1400  'cluster %d' % (i + 1)))
1401  state.add_model_group(group)
1402  # todo: make # of models to deposit configurable somewhere
1403  e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
1404  self.system.ensembles.append(e)
1405  # Add localization density info if available
1406  for fname, stuple in density_custom_ranges.items():
1407  e.load_localization_density(state, fname, stuple,
1408  self.asym_units)
1409  for stats in e.load_all_models(self, state):
1410  m = self.add_model(group)
1411  # Since we currently only deposit 1 model, it is the
1412  # best scoring one
1413  m.name = 'Best scoring model'
1414  m.stats = stats
1415  # Add RMSF info if available
1416  for c in state.all_modeled_components:
1417  e.load_rmsf(m, c)
1418 
1419  def _get_subassembly(self, comps, name, description):
1420  """Get an Assembly consisting of the given components.
1421  `compdict` is a dictionary of the components to add, where keys
1422  are the component names and values are the sequence ranges (or
1423  None to use all residues in the component)."""
1424  asyms = []
1425  for comp, seqrng in comps.items():
1426  a = self.asym_units[comp]
1427  asyms.append(a if seqrng is None else a(*seqrng))
1428 
1429  a = ihm.Assembly(asyms, name=name, description=description)
1430  return a
1431 
1432  def _add_foxs_restraint(self, model, comp, seqrange, dataset, rg, chi,
1433  details):
1434  """Add a basic FoXS fit. This is largely intended for use from the
1435  NPC application."""
1436  assembly = self._get_subassembly({comp:seqrange},
1437  name="SAXS subassembly",
1438  description="All components that fit SAXS data")
1439  r = ihm.restraint.SASRestraint(dataset, assembly, segment=False,
1440  fitting_method='FoXS', fitting_atom_type='Heavy atoms',
1441  multi_state=False, radius_of_gyration=rg, details=details)
1442  r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi)
1443  self.system.restraints.append(r)
1444  self._add_restraint_dataset(r) # so that all-dataset group works
1445 
1446  def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
1447  image_resolution, projection_number,
1448  micrographs_number):
1449  r = _EM2DRestraint(state, r, i, resolution, pixel_size,
1450  image_resolution, projection_number,
1451  micrographs_number)
1452  self.system.restraints.append(r)
1453  self._add_restraint_dataset(r) # so that all-dataset group works
1454 
1455  def add_em3d_restraint(self, state, target_ps, densities, pmi_restraint):
1456  # todo: need to set allow_duplicates on this dataset?
1457  r = _EM3DRestraint(self, state, pmi_restraint, target_ps, densities)
1458  self.system.restraints.append(r)
1459  self._add_restraint_dataset(r) # so that all-dataset group works
1460 
1461  def add_zaxial_restraint(self, state, ps, lower_bound, upper_bound,
1462  sigma, pmi_restraint):
1463  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1464  r = _GeometricRestraint(self, state, pmi_restraint, self._xy_plane,
1465  asym.get_feature(ps),
1466  ihm.restraint.LowerUpperBoundDistanceRestraint(
1467  lower_bound, upper_bound),
1468  sigma)
1469  self.system.restraints.append(r)
1470  self._add_restraint_dataset(r) # so that all-dataset group works
1471 
1472  def _get_membrane(self, tor_R, tor_r, tor_th):
1473  """Get an object representing a half-torus membrane"""
1474  if not hasattr(self, '_seen_membranes'):
1475  self._seen_membranes = {}
1476  # If a membrane already exists with all dimensions within 0.01 of
1477  # this one, reuse it
1478  membrane_id = tuple(int(x * 100.) for x in (tor_R, tor_r, tor_th))
1479  if membrane_id not in self._seen_membranes:
1480  m = ihm.geometry.HalfTorus(center=self._center_origin,
1481  transformation=self._identity_transform,
1482  major_radius=tor_R, minor_radius=tor_r, thickness=tor_th,
1483  inner=True, name='Membrane')
1484  self._seen_membranes[membrane_id] = m
1485  return self._seen_membranes[membrane_id]
1486 
1487  def add_membrane_surface_location_restraint(
1488  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1489  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1490  r = _GeometricRestraint(self, state, pmi_restraint,
1491  self._get_membrane(tor_R, tor_r, tor_th),
1492  asym.get_feature(ps),
1493  ihm.restraint.UpperBoundDistanceRestraint(0.),
1494  sigma)
1495  self.system.restraints.append(r)
1496  self._add_restraint_dataset(r) # so that all-dataset group works
1497 
1498  def add_model(self, group, assembly=None, representation=None):
1499  state = self._last_state
1500  if representation is None:
1501  representation = self.default_representation
1502  protocol = self.all_protocols.get_last_protocol(state)
1503  m = _Model(state.prot, self, protocol,
1504  assembly if assembly else state.modeled_assembly,
1505  representation)
1506  group.append(m)
1507  return m
1508 
1509  def _update_locations(self, filelocs):
1510  """Update FileLocation to point to a parent repository, if any"""
1511  all_repos = [m for m in self._metadata
1512  if isinstance(m, ihm.location.Repository)]
1513  for fileloc in filelocs:
1514  ihm.location.Repository._update_in_repos(fileloc, all_repos)
1515 
1516  _metadata = property(lambda self:
1517  itertools.chain.from_iterable(self._each_metadata))
Select non water and non hydrogen atoms.
Definition: pdb.h:243
def create_representation
Create a new Representation and return it.
Definition: mmcif.py:1096
def create_transformed_component
Make a new component that's a transformed copy of another.
Definition: mmcif.py:1153
Simple 3D transformation class.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:155
def exclude_coordinates
Don't record coordinates for the given domain.
Definition: mmcif.py:1106
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
Class to encode a modeling protocol as mmCIF.
Definition: mmcif.py:1047
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
Classes for attaching metadata to PMI objects.
Definition: pmi/metadata.py:1
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: mmcif.py:1013
Miscellaneous utilities.
Definition: tools.py:1
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:632
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: mmcif.py:1031
void read_pdb(TextInput input, int model, Hierarchy h)
A single asymmetric unit in the system.
Definition: mmcif.py:1023
A single entity in the system.
Definition: mmcif.py:996
Representation of the system.
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
Definition: mmcif.py:1376
Classes to represent data structures used in mmCIF.
Definition: data.py:1
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: mmcif.py:1035
void add_restraint(RMF::FileHandle fh, Restraint *hs)
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def finalize
Do any final processing on the class hierarchy.
Definition: mmcif.py:1225
Base class for capturing a modeling protocol.
Definition: output.py:40
Basic utilities for handling cryo-electron microscopy 3D density maps.
Class for easy writing of PDBs, RMFs, and stat files.
Definition: output.py:65
3D rotation class.
Definition: Rotation3D.h:46
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
A decorator for a residue.
Definition: Residue.h:134
Basic functionality that is expected to be used by a wide variety of IMP users.
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: mmcif.py:1009
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
VectorD< 3 > Vector3D
Definition: VectorD.h:395
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:189
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...