IMP logo
IMP Reference Guide  2.10.1
The Integrative Modeling Platform
/mmcif.py
1 """@namespace IMP.pmi1.mmcif
2  @brief Support for the mmCIF file format.
3 
4  IMP has basic support for writing out files in mmCIF format, for
5  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
6  mmCIF files are currently generated by creating an
7  IMP.pmi1.mmcif.ProtocolOutput class, and attaching it to an
8  IMP.pmi1.representation.Representation object, after which any
9  generated models and metadata are collected and output as mmCIF.
10 """
11 
12 from __future__ import print_function
13 import copy
14 import IMP.core
15 import IMP.algebra
16 import IMP.atom
17 import IMP.em
18 import IMP.isd
20 import IMP.pmi1.tools
21 from IMP.pmi1.tools import OrderedDict
22 import IMP.pmi1.output
23 import IMP.pmi1.metadata
24 import IMP.mmcif.data
25 import re
26 import ast
27 import sys
28 import os
29 import textwrap
30 import weakref
31 import operator
32 import itertools
33 import ihm.location
34 import ihm.dataset
35 import ihm.dumper
36 import ihm.metadata
37 import ihm.startmodel
38 import ihm.model
39 import ihm.protocol
40 import ihm.analysis
41 import ihm.representation
42 import ihm.geometry
43 
44 def _assign_id(obj, seen_objs, obj_by_id):
45  """Assign a unique ID to obj, and track all ids in obj_by_id."""
46  if obj not in seen_objs:
47  if not hasattr(obj, 'id'):
48  obj_by_id.append(obj)
49  obj.id = len(obj_by_id)
50  seen_objs[obj] = obj.id
51  else:
52  obj.id = seen_objs[obj]
53 
54 
55 def _get_by_residue(p):
56  """Determine whether the given particle represents a specific residue
57  or a more coarse-grained object."""
59 
60 
61 class _ComponentMapper(object):
62  """Map a Particle to a component name"""
63  def __init__(self, prot):
64  self.o = IMP.pmi1.output.Output()
65  self.prot = prot
66  self.name = 'cif-output'
67  self.o.dictionary_pdbs[self.name] = self.prot
68  self.o._init_dictchain(self.name, self.prot, multichar_chain=True)
69 
70  def __getitem__(self, p):
71  protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
72  return protname
73 
74 
75 class _AsymMapper(object):
76  """Map a Particle to an asym_unit"""
77  def __init__(self, simo, prot):
78  self.simo = simo
79  self._cm = _ComponentMapper(prot)
80 
81  def __getitem__(self, p):
82  protname = self._cm[p]
83  return self.simo.asym_units[protname]
84 
85  def get_feature(self, ps):
86  """Get an ihm.restraint.Feature that covers the given particles"""
87  # todo: handle things other than residues
88  rngs = []
89  for p in ps:
90  asym = self[p]
91  # todo: handle overlapping or adjoining ranges
93  rind = IMP.atom.Residue(p).get_index()
94  rngs.append(asym(rind, rind))
96  # PMI Fragments always contain contiguous residues
97  rinds = IMP.atom.Fragment(p).get_residue_indexes()
98  rngs.append(asym(rinds[0], rinds[-1]))
99  else:
100  raise ValueError("Unsupported particle type %s" % str(p))
101  return ihm.restraint.ResidueFeature(rngs)
102 
103 
104 class _AllSoftware(object):
105  def __init__(self, system):
106  self.system = system
107  self.modeller_used = self.phyre2_used = False
108  self.system.software.extend([
109  ihm.Software(
110  name="Integrative Modeling Platform (IMP)",
111  version=IMP.__version__,
112  classification="integrative model building",
113  description="integrative model building",
114  location='https://integrativemodeling.org'),
115  ihm.Software(
116  name="IMP PMI module",
117  version=IMP.pmi1.__version__,
118  classification="integrative model building",
119  description="integrative model building",
120  location='https://integrativemodeling.org')])
121 
122  def set_modeller_used(self, version, date):
123  if self.modeller_used:
124  return
125  self.modeller_used = True
126  self.system.software.append(ihm.Software(
127  name='MODELLER', classification='comparative modeling',
128  description='Comparative modeling by satisfaction '
129  'of spatial restraints, build ' + date,
130  location='https://salilab.org/modeller/',
131  version=version))
132 
133  def set_phyre2_used(self):
134  if self.phyre2_used:
135  return
136  self.phyre2_used = True
137  self.system.software.append(ihm.Software(
138  name='Phyre2', classification='protein homology modeling',
139  description='Protein Homology/analogY Recognition '
140  'Engine V 2.0',
141  version='2.0',
142  location='http://www.sbg.bio.ic.ac.uk/~phyre2/'))
143 
144 def _get_fragment_is_rigid(fragment):
145  """Determine whether a fragment is modeled rigidly"""
146  leaves = IMP.atom.get_leaves(fragment.hier)
147  # Assume all leaves are set up as rigid/flexible in the same way
148  return IMP.core.RigidMember.get_is_setup(leaves[0])
149 
150 class _PDBFragment(ihm.representation.ResidueSegment):
151  """Record details about part of a PDB file used as input
152  for a component."""
153  def __init__(self, state, component, start, end, pdb_offset,
154  pdbname, chain, hier, asym_unit):
155  # start, end are PMI residue indexes (not IHM)
156  super(_PDBFragment, self).__init__(
157  asym_unit=asym_unit.pmi_range(start, end),
158  rigid=None, primitive='sphere')
159  self.component, self.start, self.end, self.offset, self.pdbname \
160  = component, start, end, pdb_offset, pdbname
161  self.state, self.chain, self.hier = state, chain, hier
164  self.starting_hier = IMP.atom.read_pdb(pdbname, state.model, sel)
165 
166  rigid = property(lambda self: _get_fragment_is_rigid(self),
167  lambda self, val: None)
168 
169  def combine(self, other):
170  pass
171 
172 
173 class _BeadsFragment(ihm.representation.FeatureSegment):
174  """Record details about beads used to represent part of a component."""
175  chain = None
176  def __init__(self, state, component, start, end, count, hier, asym_unit):
177  super(_BeadsFragment, self).__init__(asym_unit=asym_unit(start, end),
178  rigid=None, primitive='sphere', count=count)
179  self.state, self.component, self.hier \
180  = state, component, hier
181 
182  rigid = property(lambda self: _get_fragment_is_rigid(self),
183  lambda self, val: None)
184 
185  def combine(self, other):
186  # todo: don't combine if one fragment is rigid and the other flexible
187  if type(other) == type(self) and \
188  other.asym_unit.seq_id_range[0] == self.asym_unit.seq_id_range[1] + 1:
189  self.asym_unit.seq_id_range = (self.asym_unit.seq_id_range[0],
190  other.asym_unit.seq_id_range[1])
191  self.count += other.count
192  return True
193 
194 
195 class _AllModelRepresentations(object):
196  def __init__(self, simo):
197  self.simo = simo
198  # dict of fragments, ordered by representation, then component name,
199  # then state
200  self.fragments = OrderedDict()
201  self._all_representations = {}
202 
203  def copy_component(self, state, name, original, asym_unit):
204  """Copy all representation for `original` in `state` to `name`"""
205  def copy_frag(f):
206  newf = copy.copy(f)
207  newf.asym_unit = asym_unit(*f.asym_unit.seq_id_range)
208  return newf
209  for rep in self.fragments:
210  if original in self.fragments[rep]:
211  if name not in self.fragments[rep]:
212  self.fragments[rep][name] = OrderedDict()
213  self.fragments[rep][name][state] = [copy_frag(f)
214  for f in self.fragments[rep][original][state]]
215  # Assume representation for a component is the same in all
216  # states, so only write out the first one
217  first_state = list(self.fragments[rep][name].keys())[0]
218  if state is first_state:
219  representation = self._all_representations[rep]
220  representation.extend(self.fragments[rep][name][state])
221 
222  def add_fragment(self, state, representation, fragment):
223  """Add a model fragment."""
224  comp = fragment.component
225  id_rep = id(representation)
226  self._all_representations[id_rep] = representation
227  if id_rep not in self.fragments:
228  self.fragments[id_rep] = OrderedDict()
229  if comp not in self.fragments[id_rep]:
230  self.fragments[id_rep][comp] = OrderedDict()
231  if state not in self.fragments[id_rep][comp]:
232  self.fragments[id_rep][comp][state] = []
233  fragments = self.fragments[id_rep][comp][state]
234  if len(fragments) == 0 or not fragments[-1].combine(fragment):
235  fragments.append(fragment)
236  # Assume representation for a component is the same in all states,
237  # so only write out the first one
238  first_state = list(self.fragments[id_rep][comp].keys())[0]
239  if state is first_state:
240  representation.append(fragment)
241 
242 
243 class _AllDatasets(object):
244  """Track all datasets generated by PMI and add them to the ihm.System"""
245  def __init__(self, system):
246  self.system = system
247  self._datasets = []
248  self._datasets_by_state = {}
249  self._restraints_by_state = {}
250 
251  def get_all_group(self, state):
252  """Get a DatasetGroup encompassing all datasets so far in this state"""
253  # Note that if a restraint dataset is replaced after this is called,
254  # the group will still contain the old dataset - mark dataset as read
255  # only?
256  g = ihm.dataset.DatasetGroup(self._datasets_by_state.get(state, [])
257  + [r.dataset for r in self._restraints_by_state.get(state, [])
258  if r.dataset])
259  return g
260 
261  def add(self, state, dataset):
262  """Add a new dataset."""
263  self._datasets.append(dataset)
264  if state not in self._datasets_by_state:
265  self._datasets_by_state[state] = []
266  self._datasets_by_state[state].append(dataset)
267  # Make sure that the dataset ends up in the mmCIF file even if nothing
268  # refers to it
269  self.system.orphan_datasets.append(dataset)
270  return dataset
271 
272  def add_restraint(self, state, restraint):
273  """Add the dataset for a restraint"""
274  if state not in self._restraints_by_state:
275  self._restraints_by_state[state] = []
276  self._restraints_by_state[state].append(restraint)
277 
278 
279 class _CrossLinkRestraint(ihm.restraint.CrossLinkRestraint):
280  """Restrain to a set of cross links"""
281 
282  assembly = None
283  _label_map = {'wtDSS':'DSS', 'scDSS':'DSS', 'scEDC':'EDC'}
284 
285  def __init__(self, pmi_restraint):
286  self.pmi_restraint = pmi_restraint
287  label = self.pmi_restraint.label
288  # Map commonly-used subtypes to more standard labels
289  label = self._label_map.get(label, label)
290  self.label = label
291  super(_CrossLinkRestraint, self).__init__(
292  dataset=self.pmi_restraint.dataset,
293  linker_type=label)
294 
295  def _set_psi_sigma(self, model):
296  old_values = []
297  # Do nothing if given a different state than the restraint applies to
298  if model.m != self.pmi_restraint.m:
299  return
300  for resolution in self.pmi_restraint.sigma_dictionary:
301  statname = 'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, self.label)
302  if model.stats and statname in model.stats:
303  sigma = float(model.stats[statname])
304  p = self.pmi_restraint.sigma_dictionary[resolution][0]
305  old_values.append((p, p.get_scale()))
306  p.set_scale(sigma)
307  for psiindex in self.pmi_restraint.psi_dictionary:
308  statname = 'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, self.label)
309  if model.stats and statname in model.stats:
310  psi = float(model.stats[statname])
311  p = self.pmi_restraint.psi_dictionary[psiindex][0]
312  old_values.append((p, p.get_scale()))
313  p.set_scale(psi)
314  # If the same particle is set multiple times, make sure we get the
315  # original value last
316  return list(reversed(old_values))
317 
318  def add_fits_from_model_statfile(self, model):
319  # Set psi/sigma for all particles
320  old_values = self._set_psi_sigma(model)
321  # If no stats were collected, we can't show the fit
322  if not old_values:
323  return
324  for xl in self.cross_links:
325  # Get current psi/sigma particle value for each XL
326  xl.fits[model] = ihm.restraint.CrossLinkFit(
327  psi=xl.psi, sigma1=xl.sigma1, sigma2=xl.sigma2)
328  # Restore original psi/sigma
329  for p, scale in old_values:
330  p.set_scale(scale)
331 
332  # Have our dataset point to that in the original PMI restraint
333  def __set_dataset(self, val):
334  self.pmi_restraint.dataset = val
335  dataset = property(lambda self: self.pmi_restraint.dataset,
336  __set_dataset)
337 
338 
339 def get_asym_mapper_for_state(simo, state, asym_map):
340  asym = asym_map.get(state, None)
341  if asym is None:
342  asym = _AsymMapper(simo, state.prot)
343  asym_map[state] = asym
344  return asym
345 
346 class _PMICrossLink(object):
347  # Query PMI particles to get psi and sigma values at time of use, not
348  # time of construction (since they may be set after creating the restraint)
349  psi = property(lambda self: self.psi_p.get_scale(),
350  lambda self, val: None)
351  sigma1 = property(lambda self: self.sigma1_p.get_scale(),
352  lambda self, val: None)
353  sigma2 = property(lambda self: self.sigma2_p.get_scale(),
354  lambda self, val: None)
355 
356 
357 class _ResidueCrossLink(ihm.restraint.ResidueCrossLink, _PMICrossLink):
358  pass
359 
360 
361 class _FeatureCrossLink(ihm.restraint.FeatureCrossLink, _PMICrossLink):
362  pass
363 
364 
365 class _EM2DRestraint(ihm.restraint.EM2DRestraint):
366  def __init__(self, state, pmi_restraint, image_number, resolution,
367  pixel_size, image_resolution, projection_number,
368  micrographs_number):
369  self.pmi_restraint, self.image_number = pmi_restraint, image_number
370  super(_EM2DRestraint, self).__init__(
371  dataset=pmi_restraint.datasets[image_number],
372  assembly=state.modeled_assembly,
373  segment=False, number_raw_micrographs=micrographs_number,
374  pixel_size_width=pixel_size, pixel_size_height=pixel_size,
375  image_resolution=image_resolution,
376  number_of_projections=projection_number)
377 
378  # Have our dataset point to that in the original PMI restraint
379  def __get_dataset(self):
380  return self.pmi_restraint.datasets[self.image_number]
381  def __set_dataset(self, val):
382  self.pmi_restraint.datasets[self.image_number] = val
383  dataset = property(__get_dataset, __set_dataset)
384 
385  def add_fits_from_model_statfile(self, model):
386  ccc = self._get_cross_correlation(model)
387  transform = self._get_transformation(model)
388  rot = transform.get_rotation()
389  rm = [[e for e in rot.get_rotation_matrix_row(i)] for i in range(3)]
390  self.fits[model] = ihm.restraint.EM2DRestraintFit(
391  cross_correlation_coefficient=ccc,
392  rot_matrix=rm,
393  tr_vector=transform.get_translation())
394 
395  def _get_transformation(self, model):
396  """Get the transformation that places the model on the image"""
397  stats = model.em2d_stats or model.stats
398  prefix = 'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
399  self.image_number + 1)
400  r = [float(stats[prefix + '_Rotation%d' % i]) for i in range(4)]
401  t = [float(stats[prefix + '_Translation%d' % i])
402  for i in range(3)]
403  # If the model coordinates are transformed, need to back transform
404  # them first
405  inv = model.transform.get_inverse()
407  IMP.algebra.Vector3D(*t)) * inv
408 
409  def _get_cross_correlation(self, model):
410  """Get the cross correlation coefficient between the model projection
411  and the image"""
412  stats = model.em2d_stats or model.stats
413  return float(stats['ElectronMicroscopy2D_%s_Image%d_CCC'
414  % (self.pmi_restraint.label,
415  self.image_number + 1)])
416 
417 
418 class _EM3DRestraint(ihm.restraint.EM3DRestraint):
419 
420  def __init__(self, simo, state, pmi_restraint, target_ps, densities):
421  self.pmi_restraint = pmi_restraint
422  super(_EM3DRestraint, self).__init__(
423  dataset=pmi_restraint.dataset,
424  assembly=self._get_assembly(densities, simo, state),
425  fitting_method='Gaussian mixture models',
426  number_of_gaussians=len(target_ps))
427 
428  # Have our dataset point to that in the original PMI restraint
429  def __set_dataset(self, val):
430  self.pmi_restraint.dataset = val
431  dataset = property(lambda self: self.pmi_restraint.dataset,
432  __set_dataset)
433 
434  def _get_assembly(self, densities, simo, state):
435  """Get the Assembly that this restraint acts on"""
436  cm = _ComponentMapper(state.prot)
437  components = {}
438  for d in densities:
439  components[cm[d]] = None # None == all residues in this component
440  a = simo._get_subassembly(components,
441  name="EM subassembly",
442  description="All components that fit the EM map")
443  return a
444 
445  def add_fits_from_model_statfile(self, model):
446  ccc = self._get_cross_correlation(model)
447  self.fits[model] = ihm.restraint.EM3DRestraintFit(
448  cross_correlation_coefficient=ccc)
449 
450  def _get_cross_correlation(self, model):
451  """Get the cross correlation coefficient between the model
452  and the map"""
453  if model.stats is not None:
454  return float(model.stats['GaussianEMRestraint_%s_CCC'
455  % self.pmi_restraint.label])
456 
457 
458 class _GeometricRestraint(ihm.restraint.GeometricRestraint):
459 
460  def __init__(self, simo, state, pmi_restraint, geometric_object,
461  feature, distance, sigma):
462  self.pmi_restraint = pmi_restraint
463  super(_GeometricRestraint, self).__init__(
464  dataset=pmi_restraint.dataset,
465  geometric_object=geometric_object, feature=feature,
466  distance=distance, harmonic_force_constant = 1. / sigma,
467  restrain_all=True)
468 
469  # Have our dataset point to that in the original PMI restraint
470  def __set_dataset(self, val):
471  self.pmi_restraint.dataset = val
472  dataset = property(lambda self: self.pmi_restraint.dataset,
473  __set_dataset)
474 
475 
476 class _ReplicaExchangeProtocolStep(ihm.protocol.Step):
477  def __init__(self, state, rex):
478  if rex.monte_carlo_sample_objects is not None:
479  method = 'Replica exchange monte carlo'
480  else:
481  method = 'Replica exchange molecular dynamics'
482  super(_ReplicaExchangeProtocolStep, self).__init__(
483  assembly=state.modeled_assembly,
484  dataset_group=None, # filled in by add_step()
485  method=method, name='Sampling',
486  num_models_begin=None, # filled in by add_step()
487  num_models_end=rex.vars["number_of_frames"],
488  multi_scale=True, multi_state=False, ordered=False)
489 
490 
491 class _SimpleProtocolStep(ihm.protocol.Step):
492  def __init__(self, state, num_models_end, method):
493  super(_SimpleProtocolStep, self).__init__(
494  assembly=state.modeled_assembly,
495  dataset_group=None, # filled in by add_step()
496  method=method, name='Sampling',
497  num_models_begin=None, # filled in by add_step()
498  num_models_end=num_models_end,
499  multi_scale=True, multi_state=False, ordered=False)
500 
501 
502 class _Chain(object):
503  """Represent a single chain in a Model"""
504  def __init__(self, pmi_chain_id, asym_unit):
505  self.pmi_chain_id, self.asym_unit = pmi_chain_id, asym_unit
506  self.spheres = []
507  self.atoms = []
508 
509  def add(self, xyz, atom_type, residue_type, residue_index,
510  all_indexes, radius):
511  if atom_type is None:
512  self.spheres.append((xyz, residue_type, residue_index,
513  all_indexes, radius))
514  else:
515  self.atoms.append((xyz, atom_type, residue_type, residue_index,
516  all_indexes, radius))
517  orig_comp = property(lambda self: self.comp)
518 
519 class _TransformedChain(object):
520  """Represent a chain that is a transformed version of another"""
521  def __init__(self, orig_chain, asym_unit, transform):
522  self.orig_chain, self.asym_unit = orig_chain, asym_unit
523  self.transform = transform
524 
525  def __get_spheres(self):
526  for (xyz, residue_type, residue_index, all_indexes,
527  radius) in self.orig_chain.spheres:
528  yield (self.transform * xyz, residue_type, residue_index,
529  all_indexes, radius)
530  spheres = property(__get_spheres)
531 
532  def __get_atoms(self):
533  for (xyz, atom_type, residue_type, residue_index, all_indexes,
534  radius) in self.orig_chain.atoms:
535  yield (self.transform * xyz, atom_type, residue_type, residue_index,
536  all_indexes, radius)
537  atoms = property(__get_atoms)
538 
539  entity = property(lambda self: self.orig_chain.entity)
540  orig_comp = property(lambda self: self.orig_chain.comp)
541 
542 class _Excluder(object):
543  def __init__(self, component, simo):
544  self._seqranges = simo._exclude_coords.get(component, [])
545 
546  def is_excluded(self, indexes):
547  """Return True iff the given sequence range is excluded."""
548  for seqrange in self._seqranges:
549  if indexes[0] >= seqrange[0] and indexes[-1] <= seqrange[1]:
550  return True
551 
552 
553 class _Model(ihm.model.Model):
554  def __init__(self, prot, simo, protocol, assembly, representation):
555  super(_Model, self).__init__(assembly=assembly, protocol=protocol,
556  representation=representation)
557  self.simo = weakref.proxy(simo)
558  # Transformation from IMP coordinates into mmCIF coordinate system.
559  # Normally we pass through coordinates unchanged, but in some cases
560  # we may want to translate them (e.g. Nup84, where the deposited PDB
561  # files are all centered; we want the mmCIF files to match)
563  self.em2d_stats = None
564  self.stats = None
565  # True iff restraints act on this model
566  self._is_restrained = True
567  o = self.output = IMP.pmi1.output.Output(atomistic=True)
568  name = 'cif-output'
569  self.m = prot.get_model()
570  o.dictionary_pdbs[name] = prot
571  o._init_dictchain(name, prot, multichar_chain=True)
572  (particle_infos_for_pdb,
573  self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
574  self.geometric_center = IMP.algebra.Vector3D(*self.geometric_center)
575  self._make_spheres_atoms(particle_infos_for_pdb, o, name, simo)
576  self.rmsf = {}
577 
578  def all_chains(self, simo):
579  """Yield all chains, including transformed ones"""
580  chain_for_comp = {}
581  for c in self.chains:
582  yield c
583  chain_for_comp[c.comp] = c
584  for tc in simo._transformed_components:
585  orig_chain = chain_for_comp.get(tc.original, None)
586  if orig_chain:
587  asym = simo.asym_units[tc.name]
588  c = _TransformedChain(orig_chain, asym, tc.transform)
589  c.comp = tc.name
590  yield c
591 
592  def _make_spheres_atoms(self, particle_infos_for_pdb, o, name, simo):
593  entity_for_chain = {}
594  comp_for_chain = {}
595  correct_asym = {}
596  for protname, chain_id in o.dictchain[name].items():
597  entity_for_chain[chain_id] = simo.entities[protname]
598  comp_for_chain[chain_id] = protname
599  # When doing multi-state modeling, the chain ID returned here
600  # (assigned sequentially) might not be correct (states may have
601  # gaps in the chain IDs). Map it to the correct asym unit.
602  correct_asym[chain_id] = simo.asym_units[protname]
603 
604  # Gather by chain ID (should be sorted by chain ID already)
605  self.chains = []
606  chain = None
607  excluder = None
608 
609  for (xyz, atom_type, residue_type, chain_id, residue_index,
610  all_indexes, radius) in particle_infos_for_pdb:
611  if chain is None or chain.pmi_chain_id != chain_id:
612  chain = _Chain(chain_id, correct_asym[chain_id])
613  chain.entity = entity_for_chain[chain_id]
614  chain.comp = comp_for_chain[chain_id]
615  self.chains.append(chain)
616  excluder = _Excluder(chain.comp, simo)
617  if not excluder.is_excluded(all_indexes if all_indexes
618  else [residue_index]):
619  chain.add(xyz, atom_type, residue_type, residue_index,
620  all_indexes, radius)
621 
622  def parse_rmsf_file(self, fname, component):
623  self.rmsf[component] = rmsf = {}
624  with open(fname) as fh:
625  for line in fh:
626  resnum, blocknum, val = line.split()
627  rmsf[int(resnum)] = (int(blocknum), float(val))
628 
629  def get_rmsf(self, component, indexes):
630  """Get the RMSF value for the given residue indexes."""
631  if not self.rmsf:
632  return None
633  rmsf = self.rmsf[component]
634  blocknums = dict.fromkeys(rmsf[ind][0] for ind in indexes)
635  if len(blocknums) != 1:
636  raise ValueError("Residue indexes %s aren't all in the same block"
637  % str(indexes))
638  return rmsf[indexes[0]][1]
639 
640  def get_atoms(self):
641  for chain in self.all_chains(self.simo):
642  pmi_offset = chain.asym_unit.entity.pmi_offset
643  for atom in chain.atoms:
644  (xyz, atom_type, residue_type, residue_index,
645  all_indexes, radius) = atom
646  pt = self.transform * xyz
647  yield ihm.model.Atom(asym_unit=chain.asym_unit,
648  seq_id=residue_index - pmi_offset,
649  atom_id=atom_type.get_string(),
650  type_symbol=None, # todo: get element
651  x=pt[0], y=pt[1], z=pt[2])
652 
653  def get_spheres(self):
654  for chain in self.all_chains(self.simo):
655  pmi_offset = chain.asym_unit.entity.pmi_offset
656  for sphere in chain.spheres:
657  (xyz, residue_type, residue_index,
658  all_indexes, radius) = sphere
659  if all_indexes is None:
660  all_indexes = (residue_index,)
661  pt = self.transform * xyz
662  yield ihm.model.Sphere(asym_unit=chain.asym_unit,
663  seq_id_range=(all_indexes[0] - pmi_offset,
664  all_indexes[-1] - pmi_offset),
665  x=pt[0], y=pt[1], z=pt[2], radius=radius,
666  rmsf=self.get_rmsf(chain.orig_comp, all_indexes))
667 
668 
669 class _AllProtocols(object):
670  def __init__(self, simo):
671  self.simo = weakref.proxy(simo)
672  # Lists of Protocols by state
673  self.protocols = OrderedDict()
674 
675  def add_protocol(self, state):
676  """Add a new Protocol"""
677  if state not in self.protocols:
678  self.protocols[state] = []
679  p = ihm.protocol.Protocol()
680  self.simo.system.orphan_protocols.append(p)
681  self.protocols[state].append(p)
682 
683  def add_step(self, step, state):
684  """Add a ProtocolStep to the last Protocol of the given State"""
685  if state not in self.protocols:
686  self.add_protocol(state)
687  protocol = self.get_last_protocol(state)
688  if len(protocol.steps) == 0:
689  step.num_models_begin = 0
690  else:
691  step.num_models_begin = protocol.steps[-1].num_models_end
692  protocol.steps.append(step)
693  step.id = len(protocol.steps)
694  # Assume that protocol uses all currently-defined datasets
695  step.dataset_group = self.simo.all_datasets.get_all_group(state)
696 
697  def add_postproc(self, step, state):
698  """Add a postprocessing step to the last protocol"""
699  protocol = self.get_last_protocol(state)
700  if len(protocol.analyses) == 0:
701  protocol.analyses.append(ihm.analysis.Analysis())
702  protocol.analyses[-1].steps.append(step)
703 
704  def get_last_protocol(self, state):
705  """Return the most recently-added _Protocol"""
706  return self.protocols[state][-1]
707 
708 
709 class _AllStartingModels(object):
710  def __init__(self, simo):
711  self.simo = simo
712  # dict of starting models (entire PDB files), collected from fragments,
713  # ordered by component name and state
714  self.models = OrderedDict()
715  self.output = IMP.pmi1.output.Output()
716 
717  def add_pdb_fragment(self, fragment):
718  """Add a starting model PDB fragment."""
719  comp = fragment.component
720  state = fragment.state
721  if comp not in self.models:
722  self.models[comp] = OrderedDict()
723  if state not in self.models[comp]:
724  self.models[comp][state] = []
725  models = self.models[comp][state]
726  if len(models) == 0 \
727  or models[-1].fragments[0].pdbname != fragment.pdbname:
728  model = self._add_model(fragment)
729  models.append(model)
730  else:
731  # Break circular ref between fragment and model
732  models[-1].fragments.append(weakref.proxy(fragment))
733  # Update residue range to cover all fragments
734  pmi_offset = models[-1].asym_unit.entity.pmi_offset
735  sid_begin = min(fragment.start + fragment.offset - pmi_offset,
736  models[-1].asym_unit.seq_id_range[0])
737  sid_end = max(fragment.end + fragment.offset - pmi_offset,
738  models[-1].asym_unit.seq_id_range[1])
739  models[-1].asym_unit = fragment.asym_unit.asym(sid_begin, sid_end)
740  fragment.starting_model = models[-1]
741 
742  def _add_model(self, f):
743  parser = ihm.metadata.PDBParser()
744  r = parser.parse_file(f.pdbname)
745 
746  self.simo._add_dataset(r['dataset'])
747  # We only want the templates that model the starting model chain
748  templates = r['templates'].get(f.chain, [])
749  for t in templates:
750  if t.alignment_file:
751  self.simo.system.locations.append(t.alignment_file)
752  if t.dataset:
753  self.simo._add_dataset(t.dataset)
754  pmi_offset = f.asym_unit.entity.pmi_offset
755  m = _StartingModel(
756  asym_unit=f.asym_unit.asym.pmi_range(f.start + f.offset,
757  f.end + f.offset),
758  dataset=r['dataset'], asym_id=f.chain,
759  templates=templates, offset=f.offset + pmi_offset,
760  metadata=r['metadata'],
761  software=r['software'][0] if r['software'] else None,
762  script_file=r['script'])
763  m.fragments = [weakref.proxy(f)]
764  return m
765 
766 
767 class _StartingModel(ihm.startmodel.StartingModel):
768  def get_seq_dif(self):
769  return self._seq_dif
770 
771  def get_atoms(self):
772  pmi_offset = self.asym_unit.entity.pmi_offset
773  mh = IMP.mmcif.data._StartingModelAtomHandler(self.templates,
774  self.asym_unit)
775  for f in self.fragments:
776  sel = IMP.atom.Selection(f.starting_hier,
777  residue_indexes=list(range(f.start - f.offset,
778  f.end - f.offset + 1)))
779  for a in mh.get_ihm_atoms(sel.get_selected_particles(),
780  f.offset - pmi_offset):
781  yield a
782  self._seq_dif = mh._seq_dif
783 
784 
785 class _ReplicaExchangeAnalysisPostProcess(ihm.analysis.ClusterStep):
786  """Post processing using AnalysisReplicaExchange0 macro"""
787 
788  def __init__(self, rex, num_models_begin):
789  self.rex = rex
790  num_models_end = 0
791  for fname in self.get_all_stat_files():
792  with open(fname) as fh:
793  num_models_end += len(fh.readlines())
794  super(_ReplicaExchangeAnalysisPostProcess, self).__init__(
795  feature='RMSD', num_models_begin=num_models_begin,
796  num_models_end=num_models_end)
797 
798  def get_stat_file(self, cluster_num):
799  return os.path.join(self.rex._outputdir, "cluster.%d" % cluster_num,
800  'stat.out')
801 
802  def get_all_stat_files(self):
803  for i in range(self.rex._number_of_clusters):
804  yield self.get_stat_file(i)
805 
806 
807 class _ReplicaExchangeAnalysisEnsemble(ihm.model.Ensemble):
808  """Ensemble generated using AnalysisReplicaExchange0 macro"""
809 
810  num_models_deposited = None # Override base class property
811 
812  def __init__(self, pp, cluster_num, model_group, num_deposit):
813  with open(pp.get_stat_file(cluster_num)) as fh:
814  num_models = len(fh.readlines())
815  super(_ReplicaExchangeAnalysisEnsemble, self).__init__(
816  num_models=num_models,
817  model_group=model_group, post_process=pp,
818  clustering_feature=pp.feature,
819  name=model_group.name)
820  self.cluster_num = cluster_num
821  self.num_models_deposited = num_deposit
822 
823  def get_rmsf_file(self, component):
824  return os.path.join(self.post_process.rex._outputdir,
825  'cluster.%d' % self.cluster_num,
826  'rmsf.%s.dat' % component)
827 
828  def load_rmsf(self, model, component):
829  fname = self.get_rmsf_file(component)
830  if os.path.exists(fname):
831  model.parse_rmsf_file(fname, component)
832 
833  def get_localization_density_file(self, fname):
834  return os.path.join(self.post_process.rex._outputdir,
835  'cluster.%d' % self.cluster_num,
836  '%s.mrc' % fname)
837 
838  def load_localization_density(self, state, fname, select_tuple, asym_units):
839  fullpath = self.get_localization_density_file(fname)
840  if os.path.exists(fullpath):
841  details = "Localization density for %s %s" \
842  % (fname, self.model_group.name)
843  local_file = ihm.location.OutputFileLocation(fullpath,
844  details=details)
845  for s in select_tuple:
846  if isinstance(s, tuple) and len(s) == 3:
847  asym = asym_units[s[2]].pmi_range(s[0], s[1])
848  else:
849  asym = asym_units[s]
850  den = ihm.model.LocalizationDensity(file=local_file,
851  asym_unit=asym)
852  self.densities.append(den)
853 
854  def load_all_models(self, simo, state):
855  stat_fname = self.post_process.get_stat_file(self.cluster_num)
856  model_num = 0
857  with open(stat_fname) as fh:
858  stats = ast.literal_eval(fh.readline())
859  # Correct path
860  rmf_file = os.path.join(os.path.dirname(stat_fname),
861  "%d.rmf3" % model_num)
862  for c in state.all_modeled_components:
863  # todo: this only works with PMI 1
864  state._pmi_object.set_coordinates_from_rmf(c, rmf_file, 0,
865  force_rigid_update=True)
866  # todo: fill in other data from stat file, e.g. crosslink phi/psi
867  yield stats
868  model_num += 1
869  if model_num >= self.num_models_deposited:
870  return
871 
872  # todo: also support dRMS precision
873  def _get_precision(self):
874  precfile = os.path.join(self.post_process.rex._outputdir,
875  "precision.%d.%d.out" % (self.cluster_num,
876  self.cluster_num))
877  if not os.path.exists(precfile):
878  return ihm.unknown
879  # Fail if the precision.x.x.out file doesn't match the cluster
880  r = re.compile('All .*/cluster.%d/ average centroid distance ([\d\.]+)'
881  % self.cluster_num)
882  with open(precfile) as fh:
883  for line in fh:
884  m = r.match(line)
885  if m:
886  return float(m.group(1))
887 
888  precision = property(lambda self: self._get_precision(),
889  lambda self, val: None)
890 
891 class _SimpleEnsemble(ihm.model.Ensemble):
892  """Simple manually-created ensemble"""
893 
894  num_models_deposited = None # Override base class property
895 
896  def __init__(self, pp, model_group, num_models, drmsd,
897  num_models_deposited, ensemble_file):
898  super(_SimpleEnsemble, self).__init__(
899  model_group=model_group, post_process=pp, num_models=num_models,
900  file=ensemble_file, precision=drmsd, name=model_group.name,
901  clustering_feature='dRMSD')
902  self.num_models_deposited = num_models_deposited
903 
904  def load_localization_density(self, state, component, asym, local_file):
905  den = ihm.model.LocalizationDensity(file=local_file, asym_unit=asym)
906  self.densities.append(den)
907 
908 
909 class _EntityMapper(dict):
910  """Handle mapping from IMP components to CIF entities.
911  Multiple components may map to the same entity if they share sequence."""
912  def __init__(self, system):
913  super(_EntityMapper, self).__init__()
914  self._sequence_dict = {}
915  self._entities = []
916  self.system = system
917 
918  def add(self, component_name, sequence, offset):
919  def entity_seq(sequence):
920  # Map X to UNK
921  if 'X' in sequence:
922  return ['UNK' if s == 'X' else s for s in sequence]
923  else:
924  return sequence
925  if sequence not in self._sequence_dict:
926  # Use the name of the first component, stripped of any copy number,
927  # as the description of the entity
928  d = component_name.split("@")[0].split(".")[0]
929  entity = Entity(entity_seq(sequence), description=d,
930  pmi_offset=offset)
931  self.system.entities.append(entity)
932  self._sequence_dict[sequence] = entity
933  self[component_name] = self._sequence_dict[sequence]
934 
935 
936 class _TransformedComponent(object):
937  def __init__(self, name, original, transform):
938  self.name, self.original, self.transform = name, original, transform
939 
940 
941 class _SimpleRef(object):
942  """Class with similar interface to weakref.ref, but keeps a strong ref"""
943  def __init__(self, ref):
944  self.ref = ref
945  def __call__(self):
946  return self.ref
947 
948 
949 class _State(ihm.model.State):
950  """Representation of a single state in the system."""
951 
952  def __init__(self, pmi_object, po):
953  # Point to the PMI object for this state. Use a weak reference
954  # since the state object typically points to us too, so we need
955  # to break the reference cycle. In PMI1 this will be a
956  # Representation object; in PMI2 it is the PMI2 State object itself.
957  self._pmi_object = weakref.proxy(pmi_object)
958  if hasattr(pmi_object, 'state'):
959  # Need a strong ref to pmi_object.state to prevent it from being
960  # cleaned up when doing PMI1 multistate modeling
961  self._pmi_state = _SimpleRef(pmi_object.state)
962  else:
963  self._pmi_state = weakref.ref(pmi_object)
964  # Preserve PMI state name
965  old_name = self.name
966  super(_State, self).__init__(experiment_type='Fraction of bulk')
967  self.name = old_name
968 
969  # The assembly of all components modeled by IMP in this state.
970  # This may be smaller than the complete assembly.
971  self.modeled_assembly = ihm.Assembly(
972  name="Modeled assembly",
973  description=self.get_postfixed_name(
974  "All components modeled by IMP"))
975  po.system.orphan_assemblies.append(self.modeled_assembly)
976 
977  self.all_modeled_components = []
978 
979  def __hash__(self):
980  return hash(self._pmi_state())
981  def __eq__(self, other):
982  return self._pmi_state() == other._pmi_state()
983 
984  def add_model_group(self, group):
985  self.append(group)
986  return group
987 
988  def get_prefixed_name(self, name):
989  """Prefix the given name with the state name, if available."""
990  if self.short_name:
991  return self.short_name + ' ' + name
992  else:
993  # Can't use capitalize() since that un-capitalizes everything
994  # but the first letter
995  return name[0].upper() + name[1:] if name else ''
996 
997  def get_postfixed_name(self, name):
998  """Postfix the given name with the state name, if available."""
999  if self.short_name:
1000  return "%s in state %s" % (name, self.short_name)
1001  else:
1002  return name
1003 
1004  short_name = property(lambda self: self._pmi_state().short_name)
1005  long_name = property(lambda self: self._pmi_state().long_name)
1006  def __get_name(self):
1007  return self._pmi_state().long_name
1008  def __set_name(self, val):
1009  self._pmi_state().long_name = val
1010  name = property(__get_name, __set_name)
1011 
1012 
1013 class Entity(ihm.Entity):
1014  """A single entity in the system.
1015  This functions identically to the base ihm.Entity class, but it
1016  allows identifying residues by either the IHM numbering scheme
1017  (seq_id, which is always contiguous starting at 1) or by the PMI
1018  scheme (which is similar, but may not start at 1 if the sequence in
1019  the FASTA file has one or more N-terminal gaps"""
1020  def __init__(self, sequence, pmi_offset, *args, **keys):
1021  # Offset between PMI numbering and IHM; <pmi_#> = <ihm_#> + pmi_offset
1022  # (pmi_offset is also the number of N-terminal gaps in the FASTA file)
1023  self.pmi_offset = pmi_offset
1024  super(Entity, self).__init__(sequence, *args, **keys)
1025 
1026  def pmi_residue(self, res_id):
1027  """Return a single IHM residue indexed using PMI numbering"""
1028  return self.residue(res_id - self.pmi_offset)
1029 
1030  def pmi_range(self, res_id_begin, res_id_end):
1031  """Return a range of IHM residues indexed using PMI numbering"""
1032  off = self.pmi_offset
1033  return self(res_id_begin - off, res_id_end - off)
1034 
1035 
1036 class AsymUnit(ihm.AsymUnit):
1037  """A single asymmetric unit in the system.
1038  This functions identically to the base ihm.AsymUnit class, but it
1039  allows identifying residues by either the IHM numbering scheme
1040  (seq_id, which is always contiguous starting at 1) or by the PMI
1041  scheme (which is similar, but may not start at 1 if the sequence in
1042  the FASTA file has one or more N-terminal gaps"""
1043 
1044  def __init__(self, entity, *args, **keys):
1045  super(AsymUnit, self).__init__(
1046  entity, auth_seq_id_map=entity.pmi_offset, *args, **keys)
1047 
1048  def pmi_residue(self, res_id):
1049  """Return a single IHM residue indexed using PMI numbering"""
1050  return self.residue(res_id - self.entity.pmi_offset)
1051 
1052  def pmi_range(self, res_id_begin, res_id_end):
1053  """Return a range of IHM residues indexed using PMI numbering"""
1054  off = self.entity.pmi_offset
1055  return self(res_id_begin - off, res_id_end - off)
1056 
1057 
1059  """Class to encode a modeling protocol as mmCIF.
1060 
1061  IMP has basic support for writing out files in mmCIF format, for
1062  deposition in [PDB-Dev](https://pdb-dev.wwpdb.org/).
1063  After creating an instance of this class, attach it to an
1064  IMP.pmi1.representation.Representation object. After this, any
1065  generated models and metadata are automatically collected and
1066  output as mmCIF.
1067  """
1068  def __init__(self, fh):
1069  # Ultimately, collect data in an ihm.System object
1070  self.system = ihm.System()
1071  self._state_group = ihm.model.StateGroup()
1072  self.system.state_groups.append(self._state_group)
1073 
1074  self.fh = fh
1075  self._state_ensemble_offset = 0
1076  self._each_metadata = [] # list of metadata for each representation
1077  self._file_datasets = []
1078  self._main_script = os.path.abspath(sys.argv[0])
1079 
1080  # Point to the main modeling script
1081  loc = ihm.location.WorkflowFileLocation(path=self._main_script,
1082  details="The main integrative modeling script")
1083  self.system.locations.append(loc)
1084 
1085  self._states = {}
1086  self.__asym_states = {}
1087  self._working_directory = os.getcwd()
1088  self.default_representation = self.create_representation(
1089  "Default representation")
1090  self.entities = _EntityMapper(self.system)
1091  # Mapping from component names to ihm.AsymUnit
1092  self.asym_units = {}
1093  self._all_components = {}
1094  self.all_modeled_components = []
1095  self._transformed_components = []
1096  self.sequence_dict = {}
1097 
1098  # Common geometric objects used in PMI systems
1099  self._xy_plane = ihm.geometry.XYPlane()
1100  self._xz_plane = ihm.geometry.XZPlane()
1101  self._z_axis = ihm.geometry.ZAxis()
1102  self._center_origin = ihm.geometry.Center(0,0,0)
1103  self._identity_transform = ihm.geometry.Transformation.identity()
1104 
1105  # Coordinates to exclude
1106  self._exclude_coords = {}
1107 
1108  self.all_representations = _AllModelRepresentations(self)
1109  self.all_protocols = _AllProtocols(self)
1110  self.all_datasets = _AllDatasets(self.system)
1111  self.all_starting_models = _AllStartingModels(self)
1112 
1113  self.all_software = _AllSoftware(self.system)
1114 
1115  def create_representation(self, name):
1116  """Create a new Representation and return it. This can be
1117  passed to add_model(), add_bead_element() or add_pdb_element()."""
1118  r = ihm.representation.Representation()
1119  self.system.orphan_representations.append(r)
1120  return r
1121 
1122  def exclude_coordinates(self, component, seqrange):
1123  """Don't record coordinates for the given domain.
1124  Coordinates for the given domain (specified by a component name
1125  and a 2-element tuple giving the start and end residue numbers)
1126  will be excluded from the mmCIF file. This can be used to exclude
1127  parts of the structure that weren't well resolved in modeling.
1128  Any bead or residue that lies wholly within this range will be
1129  excluded. Multiple ranges for a given component can be excluded
1130  by calling this method multiple times."""
1131  if component not in self._exclude_coords:
1132  self._exclude_coords[component] = []
1133  self._exclude_coords[component].append(seqrange)
1134 
1135  def _is_excluded(self, component, start, end):
1136  """Return True iff this chunk of sequence should be excluded"""
1137  for seqrange in self._exclude_coords.get(component, ()):
1138  if start >= seqrange[0] and end <= seqrange[1]:
1139  return True
1140 
1141  def _add_state(self, state):
1142  """Create a new state and return a pointer to it."""
1143  self._state_ensemble_offset = len(self.system.ensembles)
1144  s = _State(state, self)
1145  self._state_group.append(s)
1146  self._last_state = s
1147  return s
1148 
1149  def get_file_dataset(self, fname):
1150  for d in self._file_datasets:
1151  fd = d.get(os.path.abspath(fname), None)
1152  if fd:
1153  return fd
1154 
1155  def _get_chain_for_component(self, name, output):
1156  """Get the chain ID for a component, if any."""
1157  # todo: handle multiple copies
1158  if name in self.asym_units:
1159  return self.asym_units[name]._id
1160  else:
1161  # A non-modeled component doesn't have a chain ID
1162  return None
1163 
1164  def _get_assembly_comps(self, assembly):
1165  """Get the names of the components in the given assembly"""
1166  # component name = asym_unit.details
1167  comps = {}
1168  for ca in assembly:
1169  comps[ca.details] = None
1170  return comps
1171 
1172  def create_transformed_component(self, state, name, original, transform):
1173  """Make a new component that's a transformed copy of another.
1174  All representation for the existing component is copied to the
1175  new one."""
1176  assembly_comps = self._get_assembly_comps(state.modeled_assembly)
1177  if name in assembly_comps:
1178  raise ValueError("Component %s already exists" % name)
1179  elif original not in assembly_comps:
1180  raise ValueError("Original component %s does not exist" % original)
1181  self.create_component(state, name, True)
1182  self.add_component_sequence(state, name, self.sequence_dict[original])
1183  self._transformed_components.append(_TransformedComponent(
1184  name, original, transform))
1185  self.all_representations.copy_component(state, name, original,
1186  self.asym_units[name])
1187 
1188  def create_component(self, state, name, modeled):
1189  new_comp = name not in self._all_components
1190  self._all_components[name] = None
1191  if modeled:
1192  state.all_modeled_components.append(name)
1193  if new_comp:
1194  self.asym_units[name] = None # assign asym once we get sequence
1195  self.all_modeled_components.append(name)
1196 
1197  def add_component_sequence(self, state, name, seq):
1198  def get_offset(seq):
1199  # Count length of gaps at start of sequence
1200  for i in range(len(seq)):
1201  if seq[i] != '-':
1202  return seq[i:], i
1203  seq, offset = get_offset(seq)
1204  if name in self.sequence_dict:
1205  if self.sequence_dict[name] != seq:
1206  raise ValueError("Sequence mismatch for component %s" % name)
1207  else:
1208  self.sequence_dict[name] = seq
1209  self.entities.add(name, seq, offset)
1210  if name in self.asym_units:
1211  if self.asym_units[name] is None:
1212  # Set up a new asymmetric unit for this component
1213  entity = self.entities[name]
1214  asym = AsymUnit(entity, details=name)
1215  self.system.asym_units.append(asym)
1216  self.asym_units[name] = asym
1217  state.modeled_assembly.append(self.asym_units[name])
1218 
1219  def _add_restraint_model_fits(self):
1220  """Add fits to restraints for all known models"""
1221  for group, m in self.system._all_models():
1222  if m._is_restrained:
1223  for r in self.system.restraints:
1224  if hasattr(r, 'add_fits_from_model_statfile'):
1225  r.add_fits_from_model_statfile(m)
1226 
1227  def _add_em2d_raw_micrographs(self):
1228  """If the deprecated metadata.EMMicrographsDataset class was used,
1229  transfer its data to the EM2D restraint."""
1230  for r in self.system.restraints:
1231  if isinstance(r, _EM2DRestraint):
1232  for d in r.dataset.parents:
1233  if isinstance(d, IMP.pmi1.metadata.EMMicrographsDataset):
1234  r.number_raw_micrographs = d.number
1235  if isinstance(d.location,
1236  IMP.pmi1.metadata.FileLocation):
1237  d.location.content_type = \
1238  ihm.location.InputFileLocation.content_type
1239 
1240  def flush(self):
1241  self.finalize()
1242  ihm.dumper.write(self.fh, [self.system])
1243 
1244  def finalize(self):
1245  """Do any final processing on the class hierarchy.
1246  After calling this method, the `system` member (an instance
1247  of `ihm.System`) completely reproduces the PMI modeling, and
1248  can be written out to an mmCIF file with `ihm.dumper.write`,
1249  and/or modified using the ihm API."""
1250  self._add_restraint_model_fits()
1251 
1252  self._add_em2d_raw_micrographs()
1253 
1254  # Add metadata to ihm.System
1255  self.system.software.extend(m for m in self._metadata
1256  if isinstance(m, ihm.Software))
1257  self.system.citations.extend(m for m in self._metadata
1258  if isinstance(m, ihm.Citation))
1259  self.system.locations.extend(m for m in self._metadata
1260  if isinstance(m, ihm.location.FileLocation))
1261 
1262  # Point all locations to repos, if applicable
1263  all_repos = [m for m in self._metadata
1264  if isinstance(m, ihm.location.Repository)]
1265  self.system.update_locations_in_repositories(all_repos)
1266 
1267  def add_pdb_element(self, state, name, start, end, offset, pdbname,
1268  chain, hier, representation=None):
1269  if self._is_excluded(name, start, end):
1270  return
1271  if representation is None:
1272  representation = self.default_representation
1273  asym = self.asym_units[name]
1274  p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
1275  hier, asym)
1276  self.all_representations.add_fragment(state, representation, p)
1277  self.all_starting_models.add_pdb_fragment(p)
1278 
1279  def add_bead_element(self, state, name, start, end, num, hier,
1280  representation=None):
1281  if self._is_excluded(name, start, end):
1282  return
1283  if representation is None:
1284  representation = self.default_representation
1285  asym = self.asym_units[name]
1286  pmi_offset = asym.entity.pmi_offset
1287  b = _BeadsFragment(state, name, start - pmi_offset, end - pmi_offset,
1288  num, hier, asym)
1289  self.all_representations.add_fragment(state, representation, b)
1290 
1291  def get_cross_link_group(self, pmi_restraint):
1292  r = _CrossLinkRestraint(pmi_restraint)
1293  self.system.restraints.append(r)
1294  self._add_restraint_dataset(r) # so that all-dataset group works
1295  return r
1296 
1297  def add_experimental_cross_link(self, r1, c1, r2, c2, rsr):
1298  if c1 not in self._all_components or c2 not in self._all_components:
1299  # Crosslink refers to a component we didn't model
1300  # As a quick hack, just ignore it.
1301  # todo: need to add an entity for this anyway (so will need the
1302  # sequence, and add to struct_assembly)
1303  return None
1304  e1 = self.entities[c1]
1305  e2 = self.entities[c2]
1306  xl = ihm.restraint.ExperimentalCrossLink(residue1=e1.pmi_residue(r1),
1307  residue2=e2.pmi_residue(r2))
1308  rsr.experimental_cross_links.append([xl])
1309  return xl
1310 
1311  def add_cross_link(self, state, ex_xl, p1, p2, length, sigma1_p, sigma2_p,
1312  psi_p, rsr):
1313  # Map p1/p2 to asym units
1314  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1315  d = ihm.restraint.UpperBoundDistanceRestraint(length)
1316 
1317  if _get_by_residue(p1) and _get_by_residue(p2):
1318  cls = _ResidueCrossLink
1319  else:
1320  cls = _FeatureCrossLink
1321  xl = cls(ex_xl, asym1=asym[p1], asym2=asym[p2], distance=d,
1322  restrain_all=True)
1323  # Needed to look up fits later
1324  xl.psi_p, xl.sigma1_p, xl.sigma2_p = psi_p, sigma1_p, sigma2_p
1325  rsr.cross_links.append(xl)
1326 
1327  def add_replica_exchange(self, state, rex):
1328  # todo: allow for metadata to say how many replicas were used in the
1329  # actual experiment, and how many independent runs were carried out
1330  # (use these as multipliers to get the correct total number of
1331  # output models)
1332  self.all_protocols.add_step(_ReplicaExchangeProtocolStep(state, rex),
1333  state)
1334 
1335  def _add_simple_dynamics(self, num_models_end, method):
1336  # Always assumed that we're dealing with the last state
1337  state = self._last_state
1338  self.all_protocols.add_step(_SimpleProtocolStep(state, num_models_end,
1339  method), state)
1340 
1341  def _add_protocol(self):
1342  # Always assumed that we're dealing with the last state
1343  state = self._last_state
1344  self.all_protocols.add_protocol(state)
1345 
1346  def _add_dataset(self, dataset):
1347  return self.all_datasets.add(self._last_state, dataset)
1348 
1349  def _add_restraint_dataset(self, restraint):
1350  return self.all_datasets.add_restraint(self._last_state, restraint)
1351 
1352  def _add_simple_postprocessing(self, num_models_begin, num_models_end):
1353  # Always assumed that we're dealing with the last state
1354  state = self._last_state
1355  pp = ihm.analysis.ClusterStep('RMSD', num_models_begin, num_models_end)
1356  self.all_protocols.add_postproc(pp, state)
1357  return pp
1358 
1359  def _add_no_postprocessing(self, num_models):
1360  # Always assumed that we're dealing with the last state
1361  state = self._last_state
1362  pp = ihm.analysis.EmptyStep()
1363  pp.num_models_begin = pp.num_models_end = num_models
1364  self.all_protocols.add_postproc(pp, state)
1365  return pp
1366 
1367  def _add_simple_ensemble(self, pp, name, num_models, drmsd,
1368  num_models_deposited, localization_densities,
1369  ensemble_file):
1370  """Add an ensemble generated by ad hoc methods (not using PMI).
1371  This is currently only used by the Nup84 system."""
1372  # Always assumed that we're dealing with the last state
1373  state = self._last_state
1374  group = ihm.model.ModelGroup(name=state.get_postfixed_name(name))
1375  state.add_model_group(group)
1376  if ensemble_file:
1377  # Deprecated Location class does not state input vs output
1378  if isinstance(ensemble_file, IMP.pmi1.metadata.FileLocation):
1379  ensemble_file.content_type = \
1380  ihm.location.OutputFileLocation.content_type
1381  self.system.locations.append(ensemble_file)
1382  e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
1383  ensemble_file)
1384  self.system.ensembles.append(e)
1385  for c in state.all_modeled_components:
1386  den = localization_densities.get(c, None)
1387  if den:
1388  # Deprecated Location class does not state input vs output
1389  if isinstance(den, IMP.pmi1.metadata.FileLocation):
1390  den.content_type = \
1391  ihm.location.OutputFileLocation.content_type
1392  e.load_localization_density(state, c, self.asym_units[c], den)
1393  return e
1394 
1395  def set_ensemble_file(self, i, location):
1396  """Point a previously-created ensemble to an 'all-models' file.
1397  This could be a trajectory such as DCD, an RMF, or a multimodel
1398  PDB file."""
1399  # Deprecated Location class does not state input vs output
1400  if isinstance(location, IMP.pmi1.metadata.FileLocation):
1401  location.content_type = ihm.location.OutputFileLocation.content_type
1402  self.system.locations.append(location)
1403  # Ensure that we point to an ensemble related to the current state
1404  ind = i + self._state_ensemble_offset
1405  self.system.ensembles[ind].file = location
1406 
1407  def add_replica_exchange_analysis(self, state, rex, density_custom_ranges):
1408  # todo: add prefilter as an additional postprocess step (complication:
1409  # we don't know how many models it filtered)
1410  # todo: postpone rmsf/density localization extraction until after
1411  # relevant methods are called (currently it is done from the
1412  # constructor)
1413  protocol = self.all_protocols.get_last_protocol(state)
1414  num_models = protocol.steps[-1].num_models_end
1415  pp = _ReplicaExchangeAnalysisPostProcess(rex, num_models)
1416  self.all_protocols.add_postproc(pp, state)
1417  for i in range(rex._number_of_clusters):
1418  group = ihm.model.ModelGroup(name=state.get_prefixed_name(
1419  'cluster %d' % (i + 1)))
1420  state.add_model_group(group)
1421  # todo: make # of models to deposit configurable somewhere
1422  e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
1423  self.system.ensembles.append(e)
1424  # Add localization density info if available
1425  for fname, stuple in sorted(density_custom_ranges.items()):
1426  e.load_localization_density(state, fname, stuple,
1427  self.asym_units)
1428  for stats in e.load_all_models(self, state):
1429  m = self.add_model(group)
1430  # Since we currently only deposit 1 model, it is the
1431  # best scoring one
1432  m.name = 'Best scoring model'
1433  m.stats = stats
1434  # Add RMSF info if available
1435  for c in state.all_modeled_components:
1436  e.load_rmsf(m, c)
1437 
1438  def _get_subassembly(self, comps, name, description):
1439  """Get an Assembly consisting of the given components.
1440  `compdict` is a dictionary of the components to add, where keys
1441  are the component names and values are the sequence ranges (or
1442  None to use all residues in the component)."""
1443  asyms = []
1444  for comp, seqrng in comps.items():
1445  a = self.asym_units[comp]
1446  asyms.append(a if seqrng is None else a(*seqrng))
1447 
1448  a = ihm.Assembly(asyms, name=name, description=description)
1449  return a
1450 
1451  def _add_foxs_restraint(self, model, comp, seqrange, dataset, rg, chi,
1452  details):
1453  """Add a basic FoXS fit. This is largely intended for use from the
1454  NPC application."""
1455  assembly = self._get_subassembly({comp:seqrange},
1456  name="SAXS subassembly",
1457  description="All components that fit SAXS data")
1458  r = ihm.restraint.SASRestraint(dataset, assembly, segment=False,
1459  fitting_method='FoXS', fitting_atom_type='Heavy atoms',
1460  multi_state=False, radius_of_gyration=rg, details=details)
1461  r.fits[model] = ihm.restraint.SASRestraintFit(chi_value=chi)
1462  self.system.restraints.append(r)
1463  self._add_restraint_dataset(r) # so that all-dataset group works
1464 
1465  def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
1466  image_resolution, projection_number,
1467  micrographs_number):
1468  r = _EM2DRestraint(state, r, i, resolution, pixel_size,
1469  image_resolution, projection_number,
1470  micrographs_number)
1471  self.system.restraints.append(r)
1472  self._add_restraint_dataset(r) # so that all-dataset group works
1473 
1474  def add_em3d_restraint(self, state, target_ps, densities, pmi_restraint):
1475  # todo: need to set allow_duplicates on this dataset?
1476  r = _EM3DRestraint(self, state, pmi_restraint, target_ps, densities)
1477  self.system.restraints.append(r)
1478  self._add_restraint_dataset(r) # so that all-dataset group works
1479 
1480  def add_zaxial_restraint(self, state, ps, lower_bound, upper_bound,
1481  sigma, pmi_restraint):
1482  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1483  sigma, pmi_restraint, self._xy_plane)
1484 
1485  def add_yaxial_restraint(self, state, ps, lower_bound, upper_bound,
1486  sigma, pmi_restraint):
1487  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1488  sigma, pmi_restraint, self._xz_plane)
1489 
1490  def add_xyradial_restraint(self, state, ps, lower_bound, upper_bound,
1491  sigma, pmi_restraint):
1492  self._add_geometric_restraint(state, ps, lower_bound, upper_bound,
1493  sigma, pmi_restraint, self._z_axis)
1494 
1495  def _add_geometric_restraint(self, state, ps, lower_bound, upper_bound,
1496  sigma, pmi_restraint, geom):
1497  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1498  r = _GeometricRestraint(self, state, pmi_restraint, geom,
1499  asym.get_feature(ps),
1500  ihm.restraint.LowerUpperBoundDistanceRestraint(
1501  lower_bound, upper_bound),
1502  sigma)
1503  self.system.restraints.append(r)
1504  self._add_restraint_dataset(r) # so that all-dataset group works
1505 
1506  def _get_membrane(self, tor_R, tor_r, tor_th):
1507  """Get an object representing a half-torus membrane"""
1508  if not hasattr(self, '_seen_membranes'):
1509  self._seen_membranes = {}
1510  # If a membrane already exists with all dimensions within 0.01 of
1511  # this one, reuse it
1512  membrane_id = tuple(int(x * 100.) for x in (tor_R, tor_r, tor_th))
1513  if membrane_id not in self._seen_membranes:
1514  m = ihm.geometry.HalfTorus(center=self._center_origin,
1515  transformation=self._identity_transform,
1516  major_radius=tor_R, minor_radius=tor_r, thickness=tor_th,
1517  inner=True, name='Membrane')
1518  self._seen_membranes[membrane_id] = m
1519  return self._seen_membranes[membrane_id]
1520 
1521  def add_membrane_surface_location_restraint(
1522  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1523  self._add_membrane_restraint(state, ps, tor_R, tor_r, tor_th, sigma,
1524  pmi_restraint, ihm.restraint.UpperBoundDistanceRestraint(0.))
1525 
1526  def add_membrane_exclusion_restraint(
1527  self, state, ps, tor_R, tor_r, tor_th, sigma, pmi_restraint):
1528  self._add_membrane_restraint(state, ps, tor_R, tor_r, tor_th, sigma,
1529  pmi_restraint, ihm.restraint.LowerBoundDistanceRestraint(0.))
1530 
1531  def _add_membrane_restraint(self, state, ps, tor_R, tor_r, tor_th,
1532  sigma, pmi_restraint, rsr):
1533  asym = get_asym_mapper_for_state(self, state, self.__asym_states)
1534  r = _GeometricRestraint(self, state, pmi_restraint,
1535  self._get_membrane(tor_R, tor_r, tor_th),
1536  asym.get_feature(ps), rsr, sigma)
1537  self.system.restraints.append(r)
1538  self._add_restraint_dataset(r) # so that all-dataset group works
1539 
1540  def add_model(self, group, assembly=None, representation=None):
1541  state = self._last_state
1542  if representation is None:
1543  representation = self.default_representation
1544  protocol = self.all_protocols.get_last_protocol(state)
1545  m = _Model(state.prot, self, protocol,
1546  assembly if assembly else state.modeled_assembly,
1547  representation)
1548  group.append(m)
1549  return m
1550 
1551  def _update_locations(self, filelocs):
1552  """Update FileLocation to point to a parent repository, if any"""
1553  all_repos = [m for m in self._metadata
1554  if isinstance(m, ihm.location.Repository)]
1555  for fileloc in filelocs:
1556  ihm.location.Repository._update_in_repos(fileloc, all_repos)
1557 
1558  _metadata = property(lambda self:
1559  itertools.chain.from_iterable(self._each_metadata))
Select non water and non hydrogen atoms.
Definition: pdb.h:245
Simple 3D transformation class.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:155
Classes for attaching metadata to PMI objects.
Definition: pmi1/metadata.py:1
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: atom/Atom.h:241
def create_representation
Create a new Representation and return it.
Definition: /mmcif.py:1115
def finalize
Do any final processing on the class hierarchy.
Definition: /mmcif.py:1244
Class for easy writing of PDBs, RMFs, and stat files.
Definition: /output.py:65
Class to encode a modeling protocol as mmCIF.
Definition: /mmcif.py:1064
A single asymmetric unit in the system.
Definition: /mmcif.py:1040
def exclude_coordinates
Don't record coordinates for the given domain.
Definition: /mmcif.py:1125
Base class for capturing a modeling protocol.
Definition: /output.py:40
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: /mmcif.py:1026
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: rigid_bodies.h:690
void read_pdb(TextInput input, int model, Hierarchy h)
Classes to represent data structures used in mmCIF.
Definition: data.py:1
void add_restraint(RMF::FileHandle fh, Restraint *hs)
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
Miscellaneous utilities.
Definition: /tools.py:1
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
Definition: /mmcif.py:1395
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: /mmcif.py:1030
Basic utilities for handling cryo-electron microscopy 3D density maps.
def pmi_residue
Return a single IHM residue indexed using PMI numbering.
Definition: /mmcif.py:1048
def create_transformed_component
Make a new component that's a transformed copy of another.
Definition: /mmcif.py:1172
3D rotation class.
Definition: Rotation3D.h:46
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Representation of the system.
A decorator for a residue.
Definition: Residue.h:134
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
A single entity in the system.
Definition: /mmcif.py:1013
VectorD< 3 > Vector3D
Definition: VectorD.h:395
def pmi_range
Return a range of IHM residues indexed using PMI numbering.
Definition: /mmcif.py:1052
Classes for writing output files and processing them.
Definition: /output.py:1
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:189
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...