IMP logo
IMP Reference Guide  2.10.0
The Integrative Modeling Platform
/mmcif.py
1 """@namespace IMP.pmi1.mmcif
2  @brief Support for the mmCIF file format.
3 
4  IMP has basic support for writing out files in mmCIF format, for
5  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
6  mmCIF files are currently generated by creating an
7  IMP.pmi1.mmcif.ProtocolOutput class, and attaching it to an
8  IMP.pmi1.representation.Representation object, after which any
9  generated models and metadata are collected and output as mmCIF.
10 """
11 
12 from __future__ import print_function
13 import copy
14 import IMP.core
15 import IMP.algebra
16 import IMP.atom
17 import IMP.em
18 import IMP.isd
20 import IMP.pmi1.tools
21 from IMP.pmi1.tools import OrderedDict
22 import IMP.pmi1.output
23 import IMP.pmi1.metadata
24 import IMP.mmcif.data
25 import re
26 import ast
27 import sys
28 import os
29 import textwrap
30 import weakref
31 import operator
32 import itertools
33 import ihm.location
34 import ihm.dataset
35 import ihm.dumper
36 import ihm.metadata
37 import ihm.startmodel
38 import ihm.model
39 import ihm.protocol
40 import ihm.analysis
41 import ihm.representation
42 import ihm.geometry
43 
44 def _assign_id(obj, seen_objs, obj_by_id):
45  """Assign a unique ID to obj, and track all ids in obj_by_id."""
46  if obj not in seen_objs:
47  if not hasattr(obj, 'id'):
48  obj_by_id.append(obj)
49  obj.id = len(obj_by_id)
50  seen_objs[obj] = obj.id
51  else:
52  obj.id = seen_objs[obj]
53 
54 
55 def _get_by_residue(p):
56  """Determine whether the given particle represents a specific residue
57  or a more coarse-grained object."""
59 
60 
61 class _ComponentMapper(object):
62  """Map a Particle to a component name"""
63  def __init__(self, prot):
64  self.o = IMP.pmi1.output.Output()
65  self.prot = prot
66  self.name = 'cif-output'
67  self.o.dictionary_pdbs[self.name] = self.prot
68  self.o._init_dictchain(self.name, self.prot, multichar_chain=True)
69 
70  def __getitem__(self, p):
71  protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
72  return protname
73 
74 
75 class _AsymMapper(object):
76  """Map a Particle to an asym_unit"""
77  def __init__(self, simo, prot):
78  self.simo = simo
79  self._cm = _ComponentMapper(prot)
80 
81  def __getitem__(self, p):
82  protname = self._cm[p]
83  return self.simo.asym_units[protname]
84 
85  def get_feature(self, ps):
86  """Get an ihm.restraint.Feature that covers the given particles"""
87  # todo: handle things other than residues
88  rngs = []
89  for p in ps:
90  asym = self[p]
91  # todo: handle overlapping or adjoining ranges
93  rind = IMP.atom.Residue(p).get_index()
94  rngs.append(asym(rind, rind))
96  # PMI Fragments always contain contiguous residues
97  rinds = IMP.atom.Fragment(p).get_residue_indexes()
98  rngs.append(asym(rinds[0], rinds[-1]))
99  else:
100  raise ValueError("Unsupported particle type %s" % str(p))
101  return ihm.restraint.ResidueFeature(rngs)
102 
103 
104 class _AllSoftware(object):
105  def __init__(self, system):
106  self.system = system
107  self.modeller_used = self.phyre2_used = False
108  self.system.software.extend([
109  ihm.Software(
110  name="Integrative Modeling Platform (IMP)",
111  version=IMP.__version__,
112  classification="integrative model building",
113  description="integrative model building",
114  location='https://integrativemodeling.org'),
115  ihm.Software(
116  name="IMP PMI module",
117  version=IMP.pmi1.__version__,
118  classification="integrative model building",
119  description="integrative model building",
120  location='https://integrativemodeling.org')])
121 
122  def set_modeller_used(self, version, date):
123  if self.modeller_used:
124  return
125  self.modeller_used = True
126  self.system.software.append(ihm.Software(
127  name='MODELLER', classification='comparative modeling',
128  description='Comparative modeling by satisfaction '
129  'of spatial restraints, build ' + date,
130  location='https://salilab.org/modeller/',
131  version=version))
132 
133  def set_phyre2_used(self):
134  if self.phyre2_used:
135  return
136  self.phyre2_used = True
137  self.system.software.append(ihm.Software(
138  name='Phyre2', classification='protein homology modeling',
139  description='Protein Homology/analogY Recognition '
140  'Engine V 2.0',
141  version='2.0',
142  location='http://www.sbg.bio.ic.ac.uk/~phyre2/'))
143 
144 def _get_fragment_is_rigid(fragment):
145  """Determine whether a fragment is modeled rigidly"""
146  leaves = IMP.atom.get_leaves(fragment.hier)
147  # Assume all leaves are set up as rigid/flexible in the same way
148  return IMP.core.RigidMember.get_is_setup(leaves[0])
149 
150 class _PDBFragment(ihm.representation.ResidueSegment):
151  """Record details about part of a PDB file used as input
152  for a component."""
153  def __init__(self, state, component, start, end, pdb_offset,
154  pdbname, chain, hier, asym_unit):
155  # start, end are PMI residue indexes (not IHM)
156  super(_PDBFragment, self).__init__(
157  asym_unit=asym_unit.pmi_range(start, end),
158  rigid=None, primitive='sphere')
159  self.component, self.start, self.end, self.offset, self.pdbname \
160  = component, start, end, pdb_offset, pdbname
161  self.state, self.chain, self.hier = state, chain, hier
164  self.starting_hier = IMP.atom.read_pdb(pdbname, state.model, sel)
165 
166  rigid = property(lambda self: _get_fragment_is_rigid(self),
167  lambda self, val: None)
168 
169  def combine(self, other):
170  pass
171 
172 
173 class _BeadsFragment(ihm.representation.FeatureSegment):
174  """Record details about beads used to represent part of a component."""
175  chain = None
176  def __init__(self, state, component, start, end, count, hier, asym_unit):
177  super(_BeadsFragment, self).__init__(asym_unit=asym_unit(start, end),
178  rigid=None, primitive='sphere', count=count)
179  self.state, self.component, self.hier \
180  = state, component, hier
181 
182  rigid = property(lambda self: _get_fragment_is_rigid(self),
183  lambda self, val: None)
184 
185  def combine(self, other):
186  # todo: don't combine if one fragment is rigid and the other flexible
187  if type(other) == type(self) and \
188  other.asym_unit.seq_id_range[0] == self.asym_unit.seq_id_range[1] + 1:
189  self.asym_unit.seq_id_range = (self.asym_unit.seq_id_range[0],
190  other.asym_unit.seq_id_range[1])
191  self.count += other.count
192  return True
193 
194 
195 class _AllModelRepresentations(object):
196  def __init__(self, simo):
197  self.simo = simo
198  # dict of fragments, ordered by representation, then component name,
199  # then state
200  self.fragments = OrderedDict()
201  self._all_representations = {}
202 
203  def copy_component(self, state, name, original, asym_unit):
204  """Copy all representation for `original` in `state` to `name`"""
205  def copy_frag(f):
206  newf = copy.copy(f)
207  newf.asym_unit = asym_unit(*f.asym_unit.seq_id_range)
208  return newf
209  for rep in self.fragments:
210  if original in self.fragments[rep]:
211  if name not in self.fragments[rep]:
212  self.fragments[rep][name] = OrderedDict()
213  self.fragments[rep][name][state] = [copy_frag(f)
214  for f in self.fragments[rep][original][state]]
215  # Assume representation for a component is the same in all
216  # states, so only write out the first one
217  first_state = list(self.fragments[rep][name].keys())[0]
218  if state is first_state:
219  representation = self._all_representations[rep]
220  representation.extend(self.fragments[rep][name][state])
221 
222  def add_fragment(self, state, representation, fragment):
223  """Add a model fragment."""
224  comp = fragment.component
225  id_rep = id(representation)
226  self._all_representations[id_rep] = representation
227  if id_rep not in self.fragments:
228  self.fragments[id_rep] = OrderedDict()
229  if comp not in self.fragments[id_rep]:
230  self.fragments[id_rep][comp] = OrderedDict()
231  if state not in self.fragments[id_rep][comp]:
232  self.fragments[id_rep][comp][state] = []
233  fragments = self.fragments[id_rep][comp][state]
234  if len(fragments) == 0 or not fragments[-1].combine(fragment):
235  fragments.append(fragment)
236  # Assume representation for a component is the same in all states,
237  # so only write out the first one
238  first_state = list(self.fragments[id_rep][comp].keys())[0]
239  if state is first_state:
240  representation.append(fragment)
241 
242 
243 class _AllDatasets(object):
244  """Track all datasets generated by PMI and add them to the ihm.System"""
245  def __init__(self, system):
246  self.system = system
247  self._datasets = []
248  self._datasets_by_state = {}
249  self._restraints_by_state = {}
250 
251  def get_all_group(self, state):
252  """Get a DatasetGroup encompassing all datasets so far in this state"""
253  # Note that if a restraint dataset is replaced after this is called,
254  # the group will still contain the old dataset - mark dataset as read
255  # only?
256  g = ihm.dataset.DatasetGroup(self._datasets_by_state.get(state, [])
257  + [r.dataset for r in self._restraints_by_state.get(state, [])
258  if r.dataset])
259  return g
260 
261  def add(self, state, dataset):
262  """Add a new dataset."""
263  self._datasets.append(dataset)
264  if state not in self._datasets_by_state:
265  self._datasets_by_state[state] = []
266  self._datasets_by_state[state].append(dataset)
267  # Make sure that the dataset ends up in the mmCIF file even if nothing
268  # refers to it
269  self.system.orphan_datasets.append(dataset)
270  return dataset
271 
272  def add_restraint(self, state, restraint):
273  """Add the dataset for a restraint"""
274  if state not in self._restraints_by_state:
275  self._restraints_by_state[state] = []
276  self._restraints_by_state[state].append(restraint)
277 
278 
279 class _CrossLinkRestraint(ihm.restraint.CrossLinkRestraint):
280  """Restrain to a set of cross links"""
281 
282  assembly = None
283  _label_map = {'wtDSS':'DSS', 'scDSS':'DSS', 'scEDC':'EDC'}
284 
285  def __init__(self, pmi_restraint):
286  self.pmi_restraint = pmi_restraint
287  label = self.pmi_restraint.label
288  # Map commonly-used subtypes to more standard labels
289  label = self._label_map.get(label, label)
290  self.label = label
291  super(_CrossLinkRestraint, self).__init__(
292  dataset=self.pmi_restraint.dataset,
293  linker_type=label)
294 
295  def _set_psi_sigma(self, model):
296  old_values = []
297  for resolution in self.pmi_restraint.sigma_dictionary:
298  statname = 'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, self.label)
299  if model.stats and statname in model.stats:
300  sigma = float(model.stats[statname])
301  p = self.pmi_restraint.sigma_dictionary[resolution][0]
302  old_values.append((p, p.get_scale()))
303  p.set_scale(sigma)
304  for psiindex in self.pmi_restraint.psi_dictionary:
305  statname = 'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, self.label)
306  if model.stats and statname in model.stats:
307  psi = float(model.stats[statname])
308  p = self.pmi_restraint.psi_dictionary[psiindex][0]
309  old_values.append((p, p.get_scale()))
310  p.set_scale(psi)
311  # If the same particle is set multiple times, make sure we get the
312  # original value last
313  return list(reversed(old_values))
314 
315  def add_fits_from_model_statfile(self, model):
316  # Set psi/sigma for all particles
317  old_values = self._set_psi_sigma(model)
318  # If no stats were collected, we can't show the fit
319  if not old_values:
320  return
321  for xl in self.cross_links:
322  # Get current psi/sigma particle value for each XL
323  xl.fits[model] = ihm.restraint.CrossLinkFit(
324  psi=xl.psi, sigma1=xl.sigma1, sigma2=xl.sigma2)
325  # Restore original psi/sigma
326  for p, scale in old_values:
327  p.set_scale(scale)
328 
329  # Have our dataset point to that in the original PMI restraint
330  def __set_dataset(self, val):
331  self.pmi_restraint.dataset = val
332  dataset = property(lambda self: self.pmi_restraint.dataset,
333  __set_dataset)
334 
335 
336 def get_asym_mapper_for_state(simo, state, asym_map):
337  asym = asym_map.get(state, None)
338  if asym is None:
339  asym = _AsymMapper(simo, state.prot)
340  asym_map[state] = asym
341  return asym
342 
343 class _PMICrossLink(object):
344  # Query PMI particles to get psi and sigma values at time of use, not
345  # time of construction (since they may be set after creating the restraint)
346  psi = property(lambda self: self.psi_p.get_scale(),
347  lambda self, val: None)
348  sigma1 = property(lambda self: self.sigma1_p.get_scale(),
349  lambda self, val: None)
350  sigma2 = property(lambda self: self.sigma2_p.get_scale(),
351  lambda self, val: None)
352 
353 
354 class _ResidueCrossLink(ihm.restraint.ResidueCrossLink, _PMICrossLink):
355  pass
356 
357 
358 class _FeatureCrossLink(ihm.restraint.FeatureCrossLink, _PMICrossLink):
359  pass
360 
361 
362 class _EM2DRestraint(ihm.restraint.EM2DRestraint):
363  def __init__(self, state, pmi_restraint, image_number, resolution,
364  pixel_size, image_resolution, projection_number,
365  micrographs_number):
366  self.pmi_restraint, self.image_number = pmi_restraint, image_number
367  super(_EM2DRestraint, self).__init__(
368  dataset=pmi_restraint.datasets[image_number],
369  assembly=state.modeled_assembly,
370  segment=False, number_raw_micrographs=micrographs_number,
371  pixel_size_width=pixel_size, pixel_size_height=pixel_size,
372  image_resolution=image_resolution,
373  number_of_projections=projection_number)
374 
375  # Have our dataset point to that in the original PMI restraint
376  def __get_dataset(self):
377  return self.pmi_restraint.datasets[self.image_number]
378  def __set_dataset(self, val):
379  self.pmi_restraint.datasets[self.image_number] = val
380  dataset = property(__get_dataset, __set_dataset)
381 
382  def add_fits_from_model_statfile(self, model):
383  ccc = self._get_cross_correlation(model)
384  transform = self._get_transformation(model)
385  rot = transform.get_rotation()
386  rm = [[e for e in rot.get_rotation_matrix_row(i)] for i in range(3)]
387  self.fits[model] = ihm.restraint.EM2DRestraintFit(
388  cross_correlation_coefficient=ccc,
389  rot_matrix=rm,
390  tr_vector=transform.get_translation())
391 
392  def _get_transformation(self, model):
393  """Get the transformation that places the model on the image"""
394  stats = model.em2d_stats or model.stats
395  prefix = 'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
396  self.image_number + 1)
397  r = [float(stats[prefix + '_Rotation%d' % i]) for i in range(4)]
398  t = [float(stats[prefix + '_Translation%d' % i])
399  for i in range(3)]
400  # If the model coordinates are transformed, need to back transform
401  # them first
402  inv = model.transform.get_inverse()
404  IMP.algebra.Vector3D(*t)) * inv
405 
406  def _get_cross_correlation(self, model):
407  """Get the cross correlation coefficient between the model projection
408  and the image"""
409  stats = model.em2d_stats or model.stats
410  return float(stats['ElectronMicroscopy2D_%s_Image%d_CCC'
411  % (self.pmi_restraint.label,
412  self.image_number + 1)])
413 
414 
415 class _EM3DRestraint(ihm.restraint.EM3DRestraint):
416 
417  def __init__(self, simo, state, pmi_restraint, target_ps, densities):
418  self.pmi_restraint = pmi_restraint
419  super(_EM3DRestraint, self).__init__(
420  dataset=pmi_restraint.dataset,
421  assembly=self._get_assembly(densities, simo, state),
422  fitting_method='Gaussian mixture models',
423  number_of_gaussians=len(target_ps))
424 
425  # Have our dataset point to that in the original PMI restraint
426  def __set_dataset(self, val):
427  self.pmi_restraint.dataset = val
428  dataset = property(lambda self: self.pmi_restraint.dataset,
429  __set_dataset)
430 
431  def _get_assembly(self, densities, simo, state):
432  """Get the Assembly that this restraint acts on"""
433  cm = _ComponentMapper(state.prot)
434  components = {}
435  for d in densities:
436  components[cm[d]] = None # None == all residues in this component
437  a = simo._get_subassembly(components,
438  name="EM subassembly",
439  description="All components that fit the EM map")
440  return a
441 
442  def add_fits_from_model_statfile(self, model):
443  ccc = self._get_cross_correlation(model)
444  self.fits[model] = ihm.restraint.EM3DRestraintFit(
445  cross_correlation_coefficient=ccc)
446 
447  def _get_cross_correlation(self, model):
448  """Get the cross correlation coefficient between the model
449  and the map"""
450  if model.stats is not None:
451  return float(model.stats['GaussianEMRestraint_%s_CCC'
452  % self.pmi_restraint.label])
453 
454 
455 class _GeometricRestraint(ihm.restraint.GeometricRestraint):
456 
457  def __init__(self, simo, state, pmi_restraint, geometric_object,
458  feature, distance, sigma):
459  self.pmi_restraint = pmi_restraint
460  super(_GeometricRestraint, self).__init__(
461  dataset=pmi_restraint.dataset,
462  geometric_object=geometric_object, feature=feature,
463  distance=distance, harmonic_force_constant = 1. / sigma,
464  restrain_all=True)
465 
466  # Have our dataset point to that in the original PMI restraint
467  def __set_dataset(self, val):
468  self.pmi_restraint.dataset = val
469  dataset = property(lambda self: self.pmi_restraint.dataset,
470  __set_dataset)
471 
472 
473 class _ReplicaExchangeProtocolStep(ihm.protocol.Step):
474  def __init__(self, state, rex):
475  if rex.monte_carlo_sample_objects is not None:
476  method = 'Replica exchange monte carlo'
477  else:
478  method = 'Replica exchange molecular dynamics'
479  super(_ReplicaExchangeProtocolStep, self).__init__(
480  assembly=state.modeled_assembly,
481  dataset_group=None, # filled in by add_step()
482  method=method, name='Sampling',
483  num_models_begin=None, # filled in by add_step()
484  num_models_end=rex.vars["number_of_frames"],
485  multi_scale=True, multi_state=False, ordered=False)
486 
487 
488 class _SimpleProtocolStep(ihm.protocol.Step):
489  def __init__(self, state, num_models_end, method):
490  super(_SimpleProtocolStep, self).__init__(
491  assembly=state.modeled_assembly,
492  dataset_group=None, # filled in by add_step()
493  method=method, name='Sampling',
494  num_models_begin=None, # filled in by add_step()
495  num_models_end=num_models_end,
496  multi_scale=True, multi_state=False, ordered=False)
497 
498 
499 class _Chain(object):
500  """Represent a single chain in a Model"""
501  def __init__(self, pmi_chain_id, asym_unit):
502  self.pmi_chain_id, self.asym_unit = pmi_chain_id, asym_unit
503  self.spheres = []
504  self.atoms = []
505 
506  def add(self, xyz, atom_type, residue_type, residue_index,
507  all_indexes, radius):
508  if atom_type is None:
509  self.spheres.append((xyz, residue_type, residue_index,
510  all_indexes, radius))
511  else:
512  self.atoms.append((xyz, atom_type, residue_type, residue_index,
513  all_indexes, radius))
514  orig_comp = property(lambda self: self.comp)
515 
516 class _TransformedChain(object):
517  """Represent a chain that is a transformed version of another"""
518  def __init__(self, orig_chain, asym_unit, transform):
519  self.orig_chain, self.asym_unit = orig_chain, asym_unit
520  self.transform = transform
521 
522  def __get_spheres(self):
523  for (xyz, residue_type, residue_index, all_indexes,
524  radius) in self.orig_chain.spheres:
525  yield (self.transform * xyz, residue_type, residue_index,
526  all_indexes, radius)
527  spheres = property(__get_spheres)
528 
529  def __get_atoms(self):
530  for (xyz, atom_type, residue_type, residue_index, all_indexes,
531  radius) in self.orig_chain.atoms:
532  yield (self.transform * xyz, atom_type, residue_type, residue_index,
533  all_indexes, radius)
534  atoms = property(__get_atoms)
535 
536  entity = property(lambda self: self.orig_chain.entity)
537  orig_comp = property(lambda self: self.orig_chain.comp)
538 
539 class _Excluder(object):
540  def __init__(self, component, simo):
541  self._seqranges = simo._exclude_coords.get(component, [])
542 
543  def is_excluded(self, indexes):
544  """Return True iff the given sequence range is excluded."""
545  for seqrange in self._seqranges:
546  if indexes[0] >= seqrange[0] and indexes[-1] <= seqrange[1]:
547  return True
548 
549 
550 class _Model(ihm.model.Model):
551  def __init__(self, prot, simo, protocol, assembly, representation):
552  super(_Model, self).__init__(assembly=assembly, protocol=protocol,
553  representation=representation)
554  self.simo = weakref.proxy(simo)
555  # Transformation from IMP coordinates into mmCIF coordinate system.
556  # Normally we pass through coordinates unchanged, but in some cases
557  # we may want to translate them (e.g. Nup84, where the deposited PDB
558  # files are all centered; we want the mmCIF files to match)
560  self.em2d_stats = None
561  self.stats = None
562  # True iff restraints act on this model
563  self._is_restrained = True
564  o = self.output = IMP.pmi1.output.Output(atomistic=True)
565  name = 'cif-output'
566  o.dictionary_pdbs[name] = prot
567  o._init_dictchain(name, prot, multichar_chain=True)
568  (particle_infos_for_pdb,
569  self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
570  self.geometric_center = IMP.algebra.Vector3D(*self.geometric_center)
571  self._make_spheres_atoms(particle_infos_for_pdb, o, name, simo)
572  self.rmsf = {}
573 
574  def all_chains(self, simo):
575  """Yield all chains, including transformed ones"""
576  chain_for_comp = {}
577  for c in self.chains:
578  yield c
579  chain_for_comp[c.comp] = c
580  for tc in simo._transformed_components:
581  orig_chain = chain_for_comp.get(tc.original, None)
582  if orig_chain:
583  asym = simo.asym_units[tc.name]
584  c = _TransformedChain(orig_chain, asym, tc.transform)
585  c.comp = tc.name
586  yield c
587 
588  def _make_spheres_atoms(self, particle_infos_for_pdb, o, name, simo):
589  entity_for_chain = {}
590  comp_for_chain = {}
591  correct_asym = {}
592  for protname, chain_id in o.dictchain[name].items():
593  entity_for_chain[chain_id] = simo.entities[protname]
594  comp_for_chain[chain_id] = protname
595  # When doing multi-state modeling, the chain ID returned here
596  # (assigned sequentially) might not be correct (states may have
597  # gaps in the chain IDs). Map it to the correct asym unit.
598  correct_asym[chain_id] = simo.asym_units[protname]
599 
600  # Gather by chain ID (should be sorted by chain ID already)
601  self.chains = []
602  chain = None
603  excluder = None
604 
605  for (xyz, atom_type, residue_type, chain_id, residue_index,
606  all_indexes, radius) in particle_infos_for_pdb:
607  if chain is None or chain.pmi_chain_id != chain_id:
608  chain = _Chain(chain_id, correct_asym[chain_id])
609  chain.entity = entity_for_chain[chain_id]
610  chain.comp = comp_for_chain[chain_id]
611  self.chains.append(chain)
612  excluder = _Excluder(chain.comp, simo)
613  if not excluder.is_excluded(all_indexes if all_indexes
614  else [residue_index]):
615  chain.add(xyz, atom_type, residue_type, residue_index,
616  all_indexes, radius)
617 
618  def parse_rmsf_file(self, fname, component):
619  self.rmsf[component] = rmsf = {}
620  with open(fname) as fh:
621  for line in fh:
622  resnum, blocknum, val = line.split()
623  rmsf[int(resnum)] = (int(blocknum), float(val))
624 
625  def get_rmsf(self, component, indexes):
626  """Get the RMSF value for the given residue indexes."""
627  if not self.rmsf:
628  return None
629  rmsf = self.rmsf[component]
630  blocknums = dict.fromkeys(rmsf[ind][0] for ind in indexes)
631  if len(blocknums) != 1:
632  raise ValueError("Residue indexes %s aren't all in the same block"
633  % str(indexes))
634  return rmsf[indexes[0]][1]
635 
636  def get_atoms(self):
637  for chain in self.all_chains(self.simo):
638  pmi_offset = chain.asym_unit.entity.pmi_offset
639  for atom in chain.atoms:
640  (xyz, atom_type, residue_type, residue_index,
641  all_indexes, radius) = atom
642  pt = self.transform * xyz
643  yield ihm.model.Atom(asym_unit=chain.asym_unit,
644  seq_id=residue_index - pmi_offset,
645  atom_id=atom_type.get_string(),
646  type_symbol=None, # todo: get element
647  x=pt[0], y=pt[1], z=pt[2])
648 
649  def get_spheres(self):
650  for chain in self.all_chains(self.simo):
651  pmi_offset = chain.asym_unit.entity.pmi_offset
652  for sphere in chain.spheres:
653  (xyz, residue_type, residue_index,
654  all_indexes, radius) = sphere
655  if all_indexes is None:
656  all_indexes = (residue_index,)
657  pt = self.transform * xyz
658  yield ihm.model.Sphere(asym_unit=chain.asym_unit,
659  seq_id_range=(all_indexes[0] - pmi_offset,
660  all_indexes[-1] - pmi_offset),
661  x=pt[0], y=pt[1], z=pt[2], radius=radius,
662  rmsf=self.get_rmsf(chain.orig_comp, all_indexes))
663 
664 
665 class _AllProtocols(object):
666  def __init__(self, simo):
667  self.simo = weakref.proxy(simo)
668  # Lists of Protocols by state
669  self.protocols = OrderedDict()
670 
671  def add_protocol(self, state):
672  """Add a new Protocol"""
673  if state not in self.protocols:
674  self.protocols[state] = []
675  p = ihm.protocol.Protocol()
676  self.simo.system.orphan_protocols.append(p)
677  self.protocols[state].append(p)
678 
679  def add_step(self, step, state):
680  """Add a ProtocolStep to the last Protocol of the given State"""
681  if state not in self.protocols:
682  self.add_protocol(state)
683  protocol = self.get_last_protocol(state)
684  if len(protocol.steps) == 0:
685  step.num_models_begin = 0
686  else:
687  step.num_models_begin = protocol.steps[-1].num_models_end
688  protocol.steps.append(step)
689  step.id = len(protocol.steps)
690  # Assume that protocol uses all currently-defined datasets
691  step.dataset_group = self.simo.all_datasets.get_all_group(state)
692 
693  def add_postproc(self, step, state):
694  """Add a postprocessing step to the last protocol"""
695  protocol = self.get_last_protocol(state)
696  if len(protocol.analyses) == 0:
697  protocol.analyses.append(ihm.analysis.Analysis())
698  protocol.analyses[-1].steps.append(step)
699 
700  def get_last_protocol(self, state):
701  """Return the most recently-added _Protocol"""
702  return self.protocols[state][-1]
703 
704 
705 class _AllStartingModels(object):
706  def __init__(self, simo):
707  self.simo = simo
708  # dict of starting models (entire PDB files), collected from fragments,
709  # ordered by component name and state
710  self.models = OrderedDict()
711  self.output = IMP.pmi1.output.Output()
712 
713  def add_pdb_fragment(self, fragment):
714  """Add a starting model PDB fragment."""
715  comp = fragment.component
716  state = fragment.state
717  if comp not in self.models:
718  self.models[comp] = OrderedDict()
719  if state not in self.models[comp]:
720  self.models[comp][state] = []
721  models = self.models[comp][state]
722  if len(models) == 0 \
723  or models[-1].fragments[0].pdbname != fragment.pdbname:
724  model = self._add_model(fragment)
725  models.append(model)
726  else:
727  # Break circular ref between fragment and model
728  models[-1].fragments.append(weakref.proxy(fragment))
729  # Update residue range to cover all fragments
730  pmi_offset = models[-1].asym_unit.entity.pmi_offset
731  sid_begin = min(fragment.start + fragment.offset - pmi_offset,
732  models[-1].asym_unit.seq_id_range[0])
733  sid_end = max(fragment.end + fragment.offset - pmi_offset,
734  models[-1].asym_unit.seq_id_range[1])
735  models[-1].asym_unit = fragment.asym_unit.asym(sid_begin, sid_end)
736  fragment.starting_model = models[-1]
737 
738  def _add_model(self, f):
739  parser = ihm.metadata.PDBParser()
740  r = parser.parse_file(f.pdbname)
741 
742  self.simo._add_dataset(r['dataset'])
743  # We only want the templates that model the starting model chain
744  templates = r['templates'].get(f.chain, [])
745  for t in templates:
746  if t.alignment_file:
747  self.simo.system.locations.append(t.alignment_file)
748  if t.dataset:
749  self.simo._add_dataset(t.dataset)
750  pmi_offset = f.asym_unit.entity.pmi_offset
751  m = _StartingModel(
752  asym_unit=f.asym_unit.asym.pmi_range(f.start + f.offset,
753  f.end + f.offset),
754  dataset=r['dataset'], asym_id=f.chain,
755  templates=templates, offset=f.offset + pmi_offset,
756  metadata=r['metadata'],
757  software=r['software'][0] if r['software'] else None,
758  script_file=r['script'])
759  m.fragments = [weakref.proxy(f)]
760  return m
761 
762 
763 class _StartingModel(ihm.startmodel.StartingModel):
764  def get_seq_dif(self):
765  return self._seq_dif
766 
767  def get_atoms(self):
768  pmi_offset = self.asym_unit.entity.pmi_offset
769  mh = IMP.mmcif.data._StartingModelAtomHandler(self.templates,
770  self.asym_unit)
771  for f in self.fragments:
772  sel = IMP.atom.Selection(f.starting_hier,
773  residue_indexes=list(range(f.start - f.offset,
774  f.end - f.offset + 1)))
775  for a in mh.get_ihm_atoms(sel.get_selected_particles(),
776  f.offset - pmi_offset):
777  yield a
778  self._seq_dif = mh._seq_dif
779 
780 
781 class _ReplicaExchangeAnalysisPostProcess(ihm.analysis.ClusterStep):
782  """Post processing using AnalysisReplicaExchange0 macro"""
783 
784  def __init__(self, rex, num_models_begin):
785  self.rex = rex
786  num_models_end = 0
787  for fname in self.get_all_stat_files():
788  with open(fname) as fh:
789  num_models_end += len(fh.readlines())
790  super(_ReplicaExchangeAnalysisPostProcess, self).__init__(
791  feature='RMSD', num_models_begin=num_models_begin,
792  num_models_end=num_models_end)
793 
794  def get_stat_file(self, cluster_num):
795  return os.path.join(self.rex._outputdir, "cluster.%d" % cluster_num,
796  'stat.out')
797 
798  def get_all_stat_files(self):
799  for i in range(self.rex._number_of_clusters):
800  yield self.get_stat_file(i)
801 
802 
803 class _ReplicaExchangeAnalysisEnsemble(ihm.model.Ensemble):
804  """Ensemble generated using AnalysisReplicaExchange0 macro"""
805 
806  num_models_deposited = None # Override base class property
807 
808  def __init__(self, pp, cluster_num, model_group, num_deposit):
809  with open(pp.get_stat_file(cluster_num)) as fh:
810  num_models = len(fh.readlines())
811  super(_ReplicaExchangeAnalysisEnsemble, self).__init__(
812  num_models=num_models,
813  model_group=model_group, post_process=pp,
814  clustering_feature=pp.feature,
815  name=model_group.name)
816  self.cluster_num = cluster_num
817  self.num_models_deposited = num_deposit
818 
819  def get_rmsf_file(self, component):
820  return os.path.join(self.post_process.rex._outputdir,
821  'cluster.%d' % self.cluster_num,
822  'rmsf.%s.dat' % component)
823 
824  def load_rmsf(self, model, component):
825  fname = self.get_rmsf_file(component)
826  if os.path.exists(fname):
827  model.parse_rmsf_file(fname, component)
828 
829  def get_localization_density_file(self, fname):
830  return os.path.join(self.post_process.rex._outputdir,
831  'cluster.%d' % self.cluster_num,
832  '%s.mrc' % fname)
833 
834  def load_localization_density(self, state, fname, select_tuple, asym_units):
835  fullpath = self.get_localization_density_file(fname)
836  if os.path.exists(fullpath):
837  details = "Localization density for %s %s" \
838  % (fname, self.model_group.name)
839  local_file = ihm.location.OutputFileLocation(fullpath,
840  details=details)
841  for s in select_tuple:
842  if isinstance(s, tuple) and len(s) == 3:
843  asym = asym_units[s[2]].pmi_range(s[0], s[1])
844  else:
845  asym = asym_units[s]
846  den = ihm.model.LocalizationDensity(file=local_file,
847  asym_unit=asym)
848  self.densities.append(den)
849 
850  def load_all_models(self, simo, state):
851  stat_fname = self.post_process.get_stat_file(self.cluster_num)
852  model_num = 0
853  with open(stat_fname) as fh:
854  stats = ast.literal_eval(fh.readline())
855  # Correct path
856  rmf_file = os.path.join(os.path.dirname(stat_fname),
857  "%d.rmf3" % model_num)
858  for c in state.all_modeled_components:
859  # todo: this only works with PMI 1
860  state._pmi_object.set_coordinates_from_rmf(c, rmf_file, 0,
861  force_rigid_update=True)
862  # todo: fill in other data from stat file, e.g. crosslink phi/psi
863  yield stats
864  model_num += 1
865  if model_num >= self.num_models_deposited:
866  return
867 
868  # todo: also support dRMS precision
869  def _get_precision(self):
870  precfile = os.path.join(self.post_process.rex._outputdir,
871  "precision.%d.%d.out" % (self.cluster_num,
872  self.cluster_num))
873  if not os.path.exists(precfile):
874  return ihm.unknown
875  # Fail if the precision.x.x.out file doesn't match the cluster
876  r = re.compile('All .*/cluster.%d/ average centroid distance ([\d\.]+)'
877  % self.cluster_num)
878  with open(precfile) as fh:
879  for line in fh:
880  m = r.match(line)
881  if m:
882  return float(m.group(1))
883 
884  precision = property(lambda self: self._get_precision(),
885  lambda self, val: None)
886 
887 class _SimpleEnsemble(ihm.model.Ensemble):
888  """Simple manually-created ensemble"""
889 
890  num_models_deposited = None # Override base class property
891 
892  def __init__(self, pp, model_group, num_models, drmsd,
893  num_models_deposited, ensemble_file):
894  super(_SimpleEnsemble, self).__init__(
895  model_group=model_group, post_process=pp, num_models=num_models,
896  file=ensemble_file, precision=drmsd, name=model_group.name,
897  clustering_feature='dRMSD')
898  self.num_models_deposited = num_models_deposited
899 
900  def load_localization_density(self, state, component, asym, local_file):
901  den = ihm.model.LocalizationDensity(file=local_file, asym_unit=asym)
902  self.densities.append(den)
903 
904 
905 class _EntityMapper(dict):
906  """Handle mapping from IMP components to CIF entities.
907  Multiple components may map to the same entity if they share sequence."""
908  def __init__(self, system):
909  super(_EntityMapper, self).__init__()
910  self._sequence_dict = {}
911  self._entities = []
912  self.system = system
913 
914  def add(self, component_name, sequence, offset):
915  def entity_seq(sequence):
916  # Map X to UNK
917  if 'X' in sequence:
918  return ['UNK' if s == 'X' else s for s in sequence]
919  else:
920  return sequence
921  if sequence not in self._sequence_dict:
922  # Use the name of the first component, stripped of any copy number,
923  # as the description of the entity
924  d = component_name.split("@")[0].split(".")[0]
925  entity = Entity(entity_seq(sequence), description=d,
926  pmi_offset=offset)
927  self.system.entities.append(entity)
928  self._sequence_dict[sequence] = entity
929  self[component_name] = self._sequence_dict[sequence]
930 
931 
932 class _TransformedComponent(object):
933  def __init__(self, name, original, transform):
934  self.name, self.original, self.transform = name, original, transform
935 
936 
937 class _SimpleRef(object):
938  """Class with similar interface to weakref.ref, but keeps a strong ref"""
939  def __init__(self, ref):
940  self.ref = ref
941  def __call__(self):
942  return self.ref
943 
944 
945 class _State(ihm.model.State):
946  """Representation of a single state in the system."""
947 
948  def __init__(self, pmi_object, po):
949  # Point to the PMI object for this state. Use a weak reference
950  # since the state object typically points to us too, so we need
951  # to break the reference cycle. In PMI1 this will be a
952  # Representation object; in PMI2 it is the PMI2 State object itself.
953  self._pmi_object = weakref.proxy(pmi_object)
954  if hasattr(pmi_object, 'state'):
955  # Need a strong ref to pmi_object.state to prevent it from being
956  # cleaned up when doing PMI1 multistate modeling
957  self._pmi_state = _SimpleRef(pmi_object.state)
958  else:
959  self._pmi_state = weakref.ref(pmi_object)
960  # Preserve PMI state name
961  old_name = self.name
962  super(_State, self).__init__(experiment_type='Fraction of bulk')
963  self.name = old_name
964 
965  # The assembly of all components modeled by IMP in this state.
966  # This may be smaller than the complete assembly.
967  self.modeled_assembly = ihm.Assembly(
968  name="Modeled assembly",
969  description=self.get_postfixed_name(
970  "All components modeled by IMP"))
971  po.system.orphan_assemblies.append(self.modeled_assembly)
972 
973  self.all_modeled_components = []
974 
975  def __hash__(self):
976  return hash(self._pmi_state())
977  def __eq__(self, other):
978  return self._pmi_state() == other._pmi_state()
979 
980  def add_model_group(self, group):
981  self.append(group)
982  return group
983 
984  def get_prefixed_name(self, name):
985  """Prefix the given name with the state name, if available."""
986  if self.short_name:
987  return self.short_name + ' ' + name
988  else:
989  # Can't use capitalize() since that un-capitalizes everything
990  # but the first letter
991  return name[0].upper() + name[1:] if name else ''
992 
993  def get_postfixed_name(self, name):
994  """Postfix the given name with the state name, if available."""
995  if self.short_name:
996  return "%s in state %s" % (name, self.short_name)
997  else:
998  return name
999 
1000  short_name = property(lambda self: self._pmi_state().short_name)
1001  long_name = property(lambda self: self._pmi_state().long_name)
1002  def __get_name(self):
1003  return self._pmi_state().long_name
1004  def __set_name(self, val):
1005  self._pmi_state().long_name = val
1006  name = property(__get_name, __set_name)
1007 
1008 
1009 class Entity(ihm.Entity):
1010  """A single entity in the system.
1011  This functions identically to the base ihm.Entity class, but it
1012  allows identifying residues by either the IHM numbering scheme
1013  (seq_id, which is always contiguous starting at 1) or by the PMI
1014  scheme (which is similar, but may not start at 1 if the sequence in
1015  the FASTA file has one or more N-terminal gaps"""
1016  def __init__(self, sequence, pmi_offset, *args, **keys):
1017  # Offset between PMI numbering and IHM; <pmi_#> = <ihm_#> + pmi_offset
1018  # (pmi_offset is also the number of N-terminal gaps in the FASTA file)
1019  self.pmi_offset = pmi_offset
1020  super(Entity, self).__init__(sequence, *args, **keys)
1021 
1022  def pmi_residue(self, res_id):
1023  """Return a single IHM residue indexed using PMI numbering"""
1024  return self.residue(res_id - self.pmi_offset)
1025 
1026  def pmi_range(self, res_id_begin, res_id_end):
1027  """Return a range of IHM residues indexed using PMI numbering"""
1028  off = self.pmi_offset
1029  return self(res_id_begin - off, res_id_end - off)
1030 
1031 
1032 class AsymUnit(ihm.AsymUnit):
1033  """A single asymmetric unit in the system.
1034  This functions identically to the base ihm.AsymUnit class, but it
1035  allows identifying residues by either the IHM numbering scheme
1036  (seq_id, which is always contiguous starting at 1) or by the PMI
1037  scheme (which is similar, but may not start at 1 if the sequence in
1038  the FASTA file has one or more N-terminal gaps"""
1039 
1040  def __init__(self, entity, *args, **keys):
1041  super(AsymUnit, self).__init__(
1042  entity, auth_seq_id_map=entity.pmi_offset, *args, **keys)
1043 
1044  def pmi_residue(self, res_id):
1045  """Return a single IHM residue indexed using PMI numbering"""
1046  return self.residue(res_id - self.entity.pmi_offset)
1047 
1048  def pmi_range(self, res_id_begin, res_id_end):
1049  """Return a range of IHM residues indexed using PMI numbering"""
1050  off = self.entity.pmi_offset
1051  return self(res_id_begin - off, res_id_end - off)
1052 
1053 
1055  """Class to encode a modeling protocol as mmCIF.
1056 
1057  IMP has basic support for writing out files in mmCIF format, for
1058  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
1059  After creating an instance of this class, attach it to an
1060  IMP.pmi1.representation.Representation object. After this, any
1061  generated models and metadata are automatically collected and
1062  output as mmCIF.
1063  """
1064  def __init__(self, fh):
1065  # Ultimately, collect data in an ihm.System object
1066  self.system = ihm.System()
1067  self._state_group = ihm.model.StateGroup()
1068  self.system.state_groups.append(self._state_group)
1069 
1070  self.fh = fh
1071  self._state_ensemble_offset = 0
1072  self._each_metadata = [] # list of metadata for each representation
1073  self._file_datasets = []
1074  self._main_script = os.path.abspath(sys.argv[0])
1075 
1076  # Point to the main modeling script
1077  loc = ihm.location.WorkflowFileLocation(path=self._main_script,
1078  details="The main integrative modeling script")
1079  self.system.locations.append(loc)
1080 
1081  self._states = {}
1082  self.__asym_states = {}
1083  self._working_directory = os.getcwd()
1084  self.default_representation = self.create_representation(
1085  "Default representation")
1086  self.entities = _EntityMapper(self.system)
1087  # Mapping from component names to ihm.AsymUnit
1088  self.asym_units = {}
1089  self._all_components = {}
1090  self.all_modeled_components = []
1091  self._transformed_components = []
1092  self.sequence_dict = {}
1093 
1094  # Common geometric objects used in PMI systems
1095  self._xy_plane = ihm.geometry.XYPlane()
1096  self._center_origin = ihm.geometry.Center(0,0,0)
1097  self._identity_transform = ihm.geometry.Transformation.identity()
1098 
1099  # Coordinates to exclude
1100  self._exclude_coords = {}
1101 
1102  self.all_representations = _AllModelRepresentations(self)
1103  self.all_protocols = _AllProtocols(self)
1104  self.all_datasets = _AllDatasets(self.system)
1105  self.all_starting_models = _AllStartingModels(self)
1106 
1107  self.all_software = _AllSoftware(self.system)
1108 
1109  def create_representation(self, name):
1110  """Create a new Representation and return it. This can be
1111  passed to add_model(), add_bead_element() or add_pdb_element()."""
1112  r = ihm.representation.Representation()
1113  self.system.orphan_representations.append(r)
1114  return r
1115 
1116  def exclude_coordinates(self, component, seqrange):
1117  """Don't record coordinates for the given domain.
1118  Coordinates for the given domain (specified by a component name
1119  and a 2-element tuple giving the start and end residue numbers)
1120  will be excluded from the mmCIF file. This can be used to exclude
1121  parts of the structure that weren't well resolved in modeling.
1122  Any bead or residue that lies wholly within this range will be
1123  excluded. Multiple ranges for a given component can be excluded
1124  by calling this method multiple times."""
1125  if component not in self._exclude_coords:
1126  self._exclude_coords[component] = []
1127  self._exclude_coords[component].append(seqrange)
1128 
1129  def _is_excluded(self, component, start, end):
1130  """Return True iff this chunk of sequence should be excluded"""
1131  for seqrange in self._exclude_coords.get(component, ()):
1132  if start >= seqrange[0] and end <= seqrange[1]:
1133  return True
1134 
1135  def _add_state(self, state):
1136  """Create a new state and return a pointer to it."""
1137  self._state_ensemble_offset = len(self.system.ensembles)
1138  s = _State(state, self)
1139  self._state_group.append(s)
1140  self._last_state = s
1141  return s
1142 
1143  def get_file_dataset(self, fname):
1144  for d in self._file_datasets:
1145  fd = d.get(os.path.abspath(fname), None)
1146  if fd:
1147  return fd
1148 
1149  def _get_chain_for_component(self, name, output):
1150  """Get the chain ID for a component, if any."""
1151  # todo: handle multiple copies
1152  if name in self.asym_units:
1153  return self.asym_units[name]._id
1154  else:
1155  # A non-modeled component doesn't have a chain ID
1156  return None
1157 
1158  def _get_assembly_comps(self, assembly):
1159  """Get the names of the components in the given assembly"""
1160  # component name = asym_unit.details
1161  comps = {}
1162  for ca in assembly:
1163  comps[ca.details] = None
1164  return comps
1165 
1166  def create_transformed_component(self, state, name, original, transform):
1167  """Make a new component that's a transformed copy of another.
1168  All representation for the existing component is copied to the
1169  new one."""
1170  assembly_comps = self._get_assembly_comps(state.modeled_assembly)
1171  if name in assembly_comps:
1172  raise ValueError("Component %s already exists" % name)
1173  elif original not in assembly_comps:
1174  raise ValueError("Original component %s does not exist" % original)
1175  self.create_component(state, name, True)
1176  self.add_component_sequence(state, name, self.sequence_dict[original])
1177  self._transformed_components.append(_TransformedComponent(
1178  name, original, transform))
1179  self.all_representations.copy_component(state, name, original,
1180  self.asym_units[name])
1181 
1182  def create_component(self, state, name, modeled):
1183  new_comp = name not in self._all_components
1184  self._all_components[name] = None
1185  if modeled:
1186  state.all_modeled_components.append(name)
1187  if new_comp:
1188  self.asym_units[name] = None # assign asym once we get sequence
1189  self.all_modeled_components.append(name)
1190 
1191  def add_component_sequence(self, state, name, seq):
1192  def get_offset(seq):
1193  # Count length of gaps at start of sequence
1194  for i in range(len(seq)):
1195  if seq[i] != '-':
1196  return seq[i:], i
1197  seq, offset = get_offset(seq)
1198  if name in self.sequence_dict:
1199  if self.sequence_dict[name] != seq:
1200  raise ValueError("Sequence mismatch for component %s" % name)
1201  else:
1202  self.sequence_dict[name] = seq
1203  self.entities.add(name, seq, offset)
1204  if name in self.asym_units:
1205  if self.asym_units[name] is None:
1206  # Set up a new asymmetric unit for this component
1207  entity = self.entities[name]
1208  asym = AsymUnit(entity, details=name)
1209  self.system.asym_units.append(asym)
1210  self.asym_units[name] = asym
1211  state.modeled_assembly.append(self.asym_units[name])
1212 
1213  def _add_restraint_model_fits(self):
1214  """Add fits to restraints for all known models"""
1215  for group, m in self.system._all_models():
1216  if m._is_restrained:
1217  for r in self.system.restraints:
1218  if hasattr(r, 'add_fits_from_model_statfile'):
1219  r.add_fits_from_model_statfile(m)
1220 
1221  def _add_em2d_raw_micrographs(self):
1222  """If the deprecated metadata.EMMicrographsDataset class was used,
1223  transfer its data to the EM2D restraint."""
1224  for r in self.system.restraints:
1225  if isinstance(r, _EM2DRestraint):
1226  for d in r.dataset.parents:
1227  if isinstance(d, IMP.pmi1.metadata.EMMicrographsDataset):
1228  r.number_raw_micrographs = d.number
1229  if isinstance(d.location,
1230  IMP.pmi1.metadata.FileLocation):
1231  d.location.content_type = \
1232  ihm.location.InputFileLocation.content_type
1233 
1234  def flush(self):
1235  self.finalize()
1236  ihm.dumper.write(self.fh, [self.system])
1237 
1238  def finalize(self):
1239  """Do any final processing on the class hierarchy.
1240  After calling this method, the `system` member (an instance
1241  of `ihm.System`) completely reproduces the PMI modeling, and
1242  can be written out to an mmCIF file with `ihm.dumper.write`,
1243  and/or modified using the ihm API."""
1244  self._add_restraint_model_fits()
1245 
1246  self._add_em2d_raw_micrographs()
1247 
1248  # Add metadata to ihm.System
1249  self.system.software.extend(m for m in self._metadata
1250  if isinstance(m, ihm.Software))
1251  self.system.citations.extend(m for m in self._metadata
1252  if isinstance(m, ihm.Citation))
1253  self.system.locations.extend(m for m in self._metadata
1254  if isinstance(m, ihm.location.FileLocation))
1255 
1256  # Point all locations to repos, if applicable
1257  all_repos = [m for m in self._metadata
1258  if isinstance(m, ihm.location.Repository)]
1259  self.system.update_locations_in_repositories(all_repos)
1260 
1261  def add_pdb_element(self, state, name, start, end, offset, pdbname,
1262  chain, hier, representation=None):
1263  if self._is_excluded(name, start, end):
1264  return
1265  if representation is None:
1266  representation = self.default_representation
1267  asym = self.asym_units[name]
1268  p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
1269  hier, asym)
1270  self.all_representations.add_fragment(state, representation, p)
1271  self.all_starting_models.add_pdb_fragment(p)
1272 
1273  def add_bead_element(self, state, name, start, end, num, hier,
1274  representation=None):
1275  if self._is_excluded(name, start, end):
1276  return
1277  if representation is None:
1278  representation = self.default_representation
1279  asym = self.asym_units[name]
1280  pmi_offset = asym.entity.pmi_offset
1281  b = _BeadsFragment(state, name, start - pmi_offset, end - pmi_offset,
1282  num, hier, asym)
1283  self.all_representations.add_fragment(state, representation, b)
1284 
1285  def get_cross_link_group(self, pmi_restraint):
1286  r = _CrossLinkRestraint(pmi_restraint)
1287  self.system.restraints.append(r)
1288  self._add_restraint_dataset(r) # so that all-dataset group works
1289  return r
1290 
1291  def add_experimental_cross_link(self, r1, c1, r2, c2, rsr):
1292  if c1 not in self._all_components or c2 not in self._all_components:
1293  # Crosslink refers to a component we didn't model
1294  # As a quick hack, just ignore it.
1295  # todo: need to add an entity for this anyway (so will need the
1296  # sequence, and add to struct_assembly)
1297  return None
1298  e1 = self.entities[c1]
1299  e2 = self.entities[c2]
1300  xl = ihm.restraint.ExperimentalCrossLink(residue1=e1.pmi_residue(r1),
1301  residue2=e2.pmi_residue(r2))
1302  rsr.experimental_cross_links.append([xl])
1303  return xl
1304 
1305  def add_cross_link(self, state, ex_xl, p1, p2, length, sigma1_p, sigma2_p,
1306  psi_p, rsr):
1307  # Map p1/p2 to asym units
1308  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1309  d = ihm.restraint.UpperBoundDistanceRestraint(length)
1310 
1311  if _get_by_residue(p1) and _get_by_residue(p2):
1312  cls = _ResidueCrossLink
1313  else:
1314  cls = _FeatureCrossLink
1315  xl = cls(ex_xl, asym1=asym[p1], asym2=asym[p2], distance=d,
1316  restrain_all=True)
1317  # Needed to look up fits later
1318  xl.psi_p, xl.sigma1_p, xl.sigma2_p = psi_p, sigma1_p, sigma2_p
1319  rsr.cross_links.append(xl)
1320 
1321  def add_replica_exchange(self, state, rex):
1322  # todo: allow for metadata to say how many replicas were used in the
1323  # actual experiment, and how many independent runs were carried out
1324  # (use these as multipliers to get the correct total number of
1325  # output models)
1326  self.all_protocols.add_step(_ReplicaExchangeProtocolStep(state, rex),
1327  state)
1328 
1329  def _add_simple_dynamics(self, num_models_end, method):
1330  # Always assumed that we're dealing with the last state
1331  state = self._last_state
1332  self.all_protocols.add_step(_SimpleProtocolStep(state, num_models_end,
1333  method), state)
1334 
1335  def _add_protocol(self):
1336  # Always assumed that we're dealing with the last state
1337  state = self._last_state
1338  self.all_protocols.add_protocol(state)
1339 
1340  def _add_dataset(self, dataset):
1341  return self.all_datasets.add(self._last_state, dataset)
1342 
1343  def _add_restraint_dataset(self, restraint):
1344  return self.all_datasets.add_restraint(self._last_state, restraint)
1345 
1346  def _add_simple_postprocessing(self, num_models_begin, num_models_end):
1347  # Always assumed that we're dealing with the last state
1348  state = self._last_state
1349  pp = ihm.analysis.ClusterStep('RMSD', num_models_begin, num_models_end)
1350  self.all_protocols.add_postproc(pp, state)
1351  return pp
1352 
1353  def _add_no_postprocessing(self, num_models):
1354  # Always assumed that we're dealing with the last state
1355  state = self._last_state
1356  pp = ihm.analysis.EmptyStep()
1357  pp.num_models_begin = pp.num_models_end = num_models
1358  self.all_protocols.add_postproc(pp, state)
1359  return pp
1360 
1361  def _add_simple_ensemble(self, pp, name, num_models, drmsd,
1362  num_models_deposited, localization_densities,
1363  ensemble_file):
1364  """Add an ensemble generated by ad hoc methods (not using PMI).
1365  This is currently only used by the Nup84 system."""
1366  # Always assumed that we're dealing with the last state
1367  state = self._last_state
1368  group = ihm.model.ModelGroup(name=state.get_postfixed_name(name))
1369  state.add_model_group(group)
1370  if ensemble_file:
1371  # Deprecated Location class does not state input vs output
1372  if isinstance(ensemble_file, IMP.pmi1.metadata.FileLocation):
1373  ensemble_file.content_type = \
1374  ihm.location.OutputFileLocation.content_type
1375  self.system.locations.append(ensemble_file)
1376  e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
1377  ensemble_file)
1378  self.system.ensembles.append(e)
1379  for c in state.all_modeled_components:
1380  den = localization_densities.get(c, None)
1381  if den:
1382  # Deprecated Location class does not state input vs output
1383  if isinstance(den, IMP.pmi1.metadata.FileLocation):
1384  den.content_type = \
1385  ihm.location.OutputFileLocation.content_type
1386  e.load_localization_density(state, c, self.asym_units[c], den)
1387  return e
1388 
1389  def set_ensemble_file(self, i, location):
1390  """Point a previously-created ensemble to an 'all-models' file.
1391  This could be a trajectory such as DCD, an RMF, or a multimodel
1392  PDB file."""
1393  # Deprecated Location class does not state input vs output
1394  if isinstance(location, IMP.pmi1.metadata.FileLocation):
1395  location.content_type = ihm.location.OutputFileLocation.content_type
1396  self.system.locations.append(location)
1397  # Ensure that we point to an ensemble related to the current state
1398  ind = i + self._state_ensemble_offset
1399  self.system.ensembles[ind].file = location
1400 
1401  def add_replica_exchange_analysis(self, state, rex, density_custom_ranges):
1402  # todo: add prefilter as an additional postprocess step (complication:
1403  # we don't know how many models it filtered)
1404  # todo: postpone rmsf/density localization extraction until after
1405  # relevant methods are called (currently it is done from the
1406  # constructor)
1407  protocol = self.all_protocols.get_last_protocol(state)
1408  num_models = protocol.steps[-1].num_models_end
1409  pp = _ReplicaExchangeAnalysisPostProcess(rex, num_models)
1410  self.all_protocols.add_postproc(pp, state)
1411  for i in range(rex._number_of_clusters):
1412  group = ihm.model.ModelGroup(name=state.get_prefixed_name(
1413  'cluster %d' % (i + 1)))
1414  state.add_model_group(group)
1415  # todo: make # of models to deposit configurable somewhere
1416  e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
1417  self.system.ensembles.append(e)
1418  # Add localization density info if available
1419  for fname, stuple in sorted(density_custom_ranges.items()):
1420  e.load_localization_density(state, fname, stuple,
1421  self.asym_units)
1422  for stats in e.load_all_models(self, state):
1423  m = self.add_model(group)
1424  # Since we currently only deposit 1 model, it is the
1425  # best scoring one
1426  m.name = 'Best scoring model'
1427  m.stats = stats
1428  # Add RMSF info if available
1429  for c in state.all_modeled_components:
1430  e.load_rmsf(m, c)
1431 
1432  def _get_subassembly(self, comps, name, description):
1433  """Get an Assembly consisting of the given components.
1434  `compdict` is a dictionary of the components to add, where keys
1435  are the component names and values are the sequence ranges (or
1436  None to use all residues in the component)."""
1437  asyms = []
1438  for comp, seqrng in comps.items():
1439  a = self.asym_units[comp]
1440  asyms.append(a if seqrng is None else a(*seqrng))
1441 
1442  a = ihm.Assembly(asyms, name=name, description=description)
1443  return a
1444 
1445  def _add_foxs_restraint(self, model, comp, seqrange, dataset, rg, chi,
1446  details):
1447  """Add a basic FoXS fit. This is largely intended for use from the
1448  NPC application."""
1449  assembly = self._get_subassembly({comp:seqrange},
1450  name="SAXS subassembly",
1451  description="All components that fit SAXS data")
1452  r = ihm.restraint.SASRestraint(dataset, assembly, segment=False,
1453  fitting_method='FoXS', fitting_atom_type='Heavy atoms',
1454  multi_state=False, radius_of_gyration=rg, details=details)
1455  r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi)
1456  self.system.restraints.append(r)
1457  self._add_restraint_dataset(r) # so that all-dataset group works
1458 
1459  def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
1460  image_resolution, projection_number,
1461  micrographs_number):
1462  r = _EM2DRestraint(state, r, i, resolution, pixel_size,
1463  image_resolution, projection_number,
1464  micrographs_number)
1465  self.system.restraints.append(r)
1466  self._add_restraint_dataset(r) # so that all-dataset group works
1467 
1468  def add_em3d_restraint(self, state, target_ps, densities, pmi_restraint):
1469  # todo: need to set allow_duplicates on this dataset?
1470  r = _EM3DRestraint(self, state, pmi_restraint, target_ps, densities)
1471  self.system.restraints.append(r)
1472  self._add_restraint_dataset(r) # so that all-dataset group works
1473 
1474  def add_zaxial_restraint(self, state, ps, lower_bound, upper_bound,
1475  sigma, pmi_restraint):
1476  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1477  r = _GeometricRestraint(self, state, pmi_restraint, self._xy_plane,
1478  asym.get_feature(ps),
1479  ihm.restraint.LowerUpperBoundDistanceRestraint(
1480  lower_bound, upper_bound),
1481  sigma)
1482  self.system.restraints.append(r)
1483  self._add_restraint_dataset(r) # so that all-dataset group works
1484 
1485  def _get_membrane(self, tor_R, tor_r, tor_th):
1486  """Get an object representing a half-torus membrane"""
1487  if not hasattr(self, '_seen_membranes'):
1488  self._seen_membranes = {}
1489  # If a membrane already exists with all dimensions within 0.01 of
1490  # this one, reuse it
1491  membrane_id = tuple(int(x * 100.) for x in (tor_R, tor_r, tor_th))
1492  if membrane_id not in self._seen_membranes:
1493  m = ihm.geometry.HalfTorus(center=self._center_origin,
1494  transformation=self._identity_transform,
1495  major_radius=tor_R, minor_radius=tor_r, thickness=tor_th,
1496  inner=True, name='Membrane')
1497  self._seen_membranes[membrane_id] = m
1498  return self._seen_membranes[membrane_id]
1499 
1500  def add_membrane_surface_location_restraint(
1501  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1502  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1503  r = _GeometricRestraint(self, state, pmi_restraint,
1504  self._get_membrane(tor_R, tor_r, tor_th),
1505  asym.get_feature(ps),
1506  ihm.restraint.UpperBoundDistanceRestraint(0.),
1507  sigma)
1508  self.system.restraints.append(r)
1509  self._add_restraint_dataset(r) # so that all-dataset group works
1510 
1511  def add_model(self, group, assembly=None, representation=None):
1512  state = self._last_state
1513  if representation is None:
1514  representation = self.default_representation
1515  protocol = self.all_protocols.get_last_protocol(state)
1516  m = _Model(state.prot, self, protocol,
1517  assembly if assembly else state.modeled_assembly,
1518  representation)
1519  group.append(m)
1520  return m
1521 
1522  def _update_locations(self, filelocs):
1523  """Update FileLocation to point to a parent repository, if any"""
1524  all_repos = [m for m in self._metadata
1525  if isinstance(m, ihm.location.Repository)]
1526  for fileloc in filelocs:
1527  ihm.location.Repository._update_in_repos(fileloc, all_repos)
1528 
1529  _metadata = property(lambda self:
1530  itertools.chain.from_iterable(self._each_metadata))
Select non water and non hydrogen atoms.
Definition: pdb.h:245
Simple 3D transformation class.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:155
Classes for attaching metadata to PMI objects.
Definition: pmi1/metadata.py:1
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
def create_representation
Create a new Representation and return it.
Definition: /mmcif.py:1109
def finalize
Do any final processing on the class hierarchy.
Definition: /mmcif.py:1238
Class for easy writing of PDBs, RMFs, and stat files.
Definition: /output.py:65
Class to encode a modeling protocol as mmCIF.
Definition: /mmcif.py:1060
A single asymmetric unit in the system.
Definition: /mmcif.py:1036
def exclude_coordinates
Don't record coordinates for the given domain.
Definition: /mmcif.py:1119
Base class for capturing a modeling protocol.
Definition: /output.py:40
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: /mmcif.py:1022
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:690
void read_pdb(TextInput input, int model, Hierarchy h)
Classes to represent data structures used in mmCIF.
Definition: data.py:1
void add_restraint(RMF::FileHandle fh, Restraint *hs)
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
Miscellaneous utilities.
Definition: /tools.py:1
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
Definition: /mmcif.py:1389
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: /mmcif.py:1026
Basic utilities for handling cryo-electron microscopy 3D density maps.
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: /mmcif.py:1044
def create_transformed_component
Make a new component that's a transformed copy of another.
Definition: /mmcif.py:1166
3D rotation class.
Definition: Rotation3D.h:46
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Representation of the system.
A decorator for a residue.
Definition: Residue.h:134
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
A single entity in the system.
Definition: /mmcif.py:1009
VectorD< 3 > Vector3D
Definition: VectorD.h:395
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: /mmcif.py:1048
Classes for writing output files and processing them.
Definition: /output.py:1
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:189
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...