IMP logo
IMP Reference Guide  2.20.0
The Integrative Modeling Platform
mmcif/util.py
1 """@namespace IMP.mmcif.util
2  @brief Utility functions for IMP.mmcif.
3 """
4 
5 import ihm.location
6 import ihm.dumper
7 import IMP.mmcif.data
9 import IMP.rmf
10 import IMP.atom
11 import RMF
12 import ihm.format
13 import ihm.representation
14 import string
15 import itertools
16 import json
17 import sys
18 from IMP.mmcif.data import _get_all_state_provenance
19 
20 
21 if sys.version_info[0] == 2:
22  # basestring is base for both str and unicode (used by json)
23  _string_type = basestring # noqa: F821
24 else:
25  _string_type = str
26 
27 
28 class _ChainIDs(object):
29  """Map indices to multi-character chain IDs.
30  We label the first 26 chains A-Z, then we move to two-letter
31  chain IDs: AA through AZ, then BA through BZ, through to ZZ.
32  This continues with longer chain IDs."""
33  def __getitem__(self, ind):
34  chars = string.ascii_uppercase
35  lc = len(chars)
36  ids = []
37  while ind >= lc:
38  ids.append(chars[ind % lc])
39  ind = ind // lc - 1
40  ids.append(chars[ind])
41  return "".join(reversed(ids))
42 
43 
44 class Writer(object):
45  """Convert one or more IMP Models and/or RMF frames to mmCIF
46  or BinaryCIF.
47 
48  Models can be added by calling `add_model`. The final output
49  is collected in a python-ihm `ihm.System` object, as the `system`
50  attribute, and can be written out using the python-ihm API or
51  with the convenience `write` method.
52 
53  This class is still in development and does not yet convert all
54  IMP data to IHM mmCIF/BinaryCIF.
55  """
56 
57  def __init__(self):
58  self.system = ihm.System()
59  self._state_by_name = {}
60  self._entities = IMP.mmcif.data._EntityMapper(self.system)
61  self._components = IMP.mmcif.data._ComponentMapper(self.system)
62  self._software = IMP.mmcif.data._AllSoftware(self.system)
63  self._external_files = IMP.mmcif.data._ExternalFiles(self.system)
64  self._datasets = IMP.mmcif.data._Datasets(self.system)
65  self._model_assemblies = IMP.mmcif.data._ModelAssemblies(self.system)
66  self._representations = IMP.mmcif.data._Representations(self.system)
67  self._protocols = IMP.mmcif.data._Protocols(self.system)
68  self._restraints = IMP.mmcif.restraint._AllRestraints(
69  self.system, self._components)
70 
71  def add_model(self, hiers, restraints, name=None, states=None,
72  ensembles=None):
73  """Add information to the system from an IMP Model.
74  Multiple IHM Models (coordinate sets) may be created (one per
75  state per hierarchy). All IHM models in a state are given the
76  same name and are added to the given IHM Ensemble for that state
77  (or placed in a new Ensemble if not given). Only coordinates
78  for the given state names (or all states, if not given) are added.
79  The IHM Ensembles containing the new models are returned."""
80  if ensembles is None:
81  ensembles = {}
82  if self.system.title is None and len(hiers) > 0:
83  self.system.title = hiers[0].get_name()
84  ihm_models = []
85  for state_obj, state_hier, top_hier in self._get_states(hiers, states):
86  e, model = self._add_hierarchy(
87  state_hier, top_hier, state_obj, name,
88  ensembles.get(state_obj.name))
89  ihm_models.append(model)
90  ensembles[state_obj.name] = e
91  self._add_restraints(restraints, ihm_models)
92  return ensembles
93 
94  def _add_restraints(self, restraints, ihm_models):
95  all_rsr = []
96  for r in restraints:
97  all_rsr.extend(self._handle_restraint(r, ihm_models))
98  # IMP provenance doesn't currently record which restraints were
99  # used in modeling, so assume all of them were
100  dsg = self._datasets.add_group(
101  [r.dataset for r in all_rsr if r.dataset is not None],
102  "All datasets used in modeling")
103  for m in ihm_models:
104  if not m.protocol:
105  continue
106  for step in m.protocol.steps:
107  step.dataset_group = dsg
108  for analysis in m.protocol.analyses:
109  for step in analysis.steps:
110  step.dataset_group = dsg
111 
112  def _handle_restraint(self, r, ihm_models):
113  num_cif_r = 0
114  for cif_r in self._restraints.handle(r, ihm_models):
115  num_cif_r += 1
116  yield cif_r
117  if num_cif_r == 0:
118  try:
119  rs = IMP.RestraintSet.get_from(r)
120  except ValueError:
121  rs = None
122  if rs:
123  for child in rs.restraints:
124  for r in self._handle_restraint(child, ihm_models):
125  yield r
126 
127  def _ensure_unique_molecule_names(self, chains):
128  """Make sure that the input IMP molecule names are unique.
129  We assume that a given molecule name maps to an IHM asym,
130  so we need these names to be unique. Rename molecules if
131  necessary."""
132  def _get_unique_name(old_name, seen_names):
133  for copy_num in itertools.count(1):
134  name = old_name + " copy %d" % copy_num
135  if name not in seen_names:
136  return name
137 
138  seen_names = set()
139  for c in chains:
141  if mol and mol.get_name():
142  name = mol.get_name()
143  if name in seen_names:
144  old_name = name
145  name = _get_unique_name(old_name, seen_names)
146  mol.set_name(name)
147  print("Duplicate molecule name detected - reassigning "
148  "from %r to %r" % (old_name, name))
149  seen_names.add(name)
150 
151  def _ensure_unique_chain_ids(self, chains):
152  """Make sure that IMP chain IDs are unique, since they are mapped to
153  IHM asym IDs, which need to be unique. Reassign them alphabetically
154  if duplicates are found."""
155  chain_ids = [c.get_id() for c in chains]
156  if len(set(chain_ids)) < len(chain_ids):
157  for chain, cid in zip(chains, _ChainIDs()):
158  chain.set_id(cid)
159  print("Duplicate chain IDs detected - reassigning from %s to %s"
160  % (chain_ids, [c.get_id() for c in chains]))
161 
162  def _add_hierarchy(self, h, top_h, state, name, ensemble):
163  if ensemble is None:
164  mg = ihm.model.ModelGroup()
165  state.append(mg)
166  ensemble = ihm.model.Ensemble(model_group=mg, num_models=0)
167  self.system.ensembles.append(ensemble)
168  chains = [IMP.atom.Chain(c)
169  for c in IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE)]
170  self._ensure_unique_molecule_names(chains)
171  self._ensure_unique_chain_ids(chains)
172  if len(chains) == 0:
173  raise ValueError("No chains found in %s" % h)
174  asyms = []
175  ch = IMP.mmcif.data._CoordinateHandler(self.system, self._datasets)
176  for chain in chains:
177  ps = ch.get_structure_particles(chain)
178  comp = self._add_chain(chain, ch.get_residue_sequence(ps))
179  asyms.append(comp.asym_unit)
180  ch.add_chain(ps, comp.asym_unit)
181  representation = self._representations.add(ch._representation)
182  assembly = self._model_assemblies.add(asyms)
183  self._add_hierarchy_ensemble_info(h, top_h, ensemble)
184  self._software.add_hierarchy(h, top_h)
185  protocol = self._protocols._add_hierarchy(h, top_h, assembly,
186  self._software)
187  self._external_files.add_hierarchy(h, top_h)
188  model = ihm.model.Model(assembly=assembly, protocol=protocol,
189  representation=representation, name=name)
190  model._atoms = ch._atoms
191  model._spheres = ch._spheres
192  ensemble.model_group.append(model)
193  ensemble.num_models += 1
194  return ensemble, model
195 
196  def _add_hierarchy_ensemble_info(self, h, top_h, ensemble):
197  """Add ensemble-specific information from the given hierarchy"""
198  for prov in reversed(list(_get_all_state_provenance(
199  h, top_h, types=[IMP.core.ClusterProvenance]))):
200  ensemble.num_models = prov.get_number_of_members()
201  ensemble.precision = prov.get_precision()
202  ensemble.name = prov.get_name()
203  d = prov.get_density()
204  if d and d.endswith('.json'):
205  with open(d) as fh:
206  j = json.load(fh)
207  self._parse_json_density(j, d, ensemble)
208 
209  def _parse_json_density(self, j, json_fname, ensemble):
210  """Add information from cluster density JSON file"""
211  if j.get('producer') and j['producer'].get('name') == 'IMP.sampcon':
212  self._parse_sampcon(j, json_fname, ensemble)
213 
214  def _parse_sampcon(self, j, json_fname, ensemble):
215  """Add information from IMP.sampcon-generated JSON file"""
216  ranges = j['density_custom_ranges']
217  for cluster in j['clusters']:
218  if cluster['name'] == ensemble.name:
219  for range_name, mrc in cluster['density'].items():
220  sel_tuple = ranges[range_name]
221  if len(sel_tuple) > 1:
222  raise ValueError("Cannot handle sel tuple")
223  asym = self._parse_sel_tuple(sel_tuple[0])
224  # Path is relative to that of the JSON file
225  # With Python 2, convert mrc from unicode to str
226  full_mrc = IMP.get_relative_path(json_fname, str(mrc))
227  density = ihm.model.LocalizationDensity(
228  file=ihm.location.OutputFileLocation(
229  path=full_mrc, details='Localization density'),
230  asym_unit=asym)
231  ensemble.densities.append(density)
232 
233  def _parse_sel_tuple(self, t):
234  """Convert a PMI-style selection tuple into an IHM object"""
235  asym_map = {}
236  for asym in self.system.asym_units:
237  asym_map[asym.details] = asym
238  if isinstance(t, _string_type):
239  return asym_map[t]
240  elif isinstance(t, (list, tuple)) and len(t) == 3:
241  return asym_map[t[2]](t[0], t[1])
242  else:
243  raise TypeError("Cannot handle selection tuple: %s" % t)
244 
245  def _get_states(self, hiers, states):
246  def get_state_by_name(name):
247  if states is not None and name not in states:
248  return None
249  if name not in self._state_by_name:
250  self._add_state(ihm.model.State(name=name))
251  return self._state_by_name[name]
252 
253  for h in hiers:
254  state_hiers = IMP.atom.get_by_type(h, IMP.atom.STATE_TYPE)
255  for state_hier in state_hiers:
256  state_obj = get_state_by_name(state_hier.get_name())
257  if state_obj is not None:
258  yield state_obj, state_hier, h
259  # If no state nodes, treat everything as in a single unnamed state
260  if len(state_hiers) == 0:
261  state_obj = get_state_by_name(None)
262  if state_obj is not None:
263  yield state_obj, h, None
264 
265  def _add_state(self, state):
266  if not self.system.state_groups:
267  self.system.state_groups.append(ihm.model.StateGroup())
268  self.system.state_groups[-1].append(state)
269  self._state_by_name[state.name] = state
270 
271  def _add_chain(self, chain, seq_from_res):
272  entity, offset = self._entities.add(chain, seq_from_res)
273  component = self._components.add(chain, entity, offset)
274  return component
275 
276  def add_rmf(self, filename, name=None, frame=0, states=None,
277  ensembles=None):
278  """Add information to the system from a single frame in an RMF file."""
279  m = IMP.Model()
280  rh = RMF.open_rmf_file_read_only(filename)
281  hiers = IMP.rmf.create_hierarchies(rh, m)
282  restraints = IMP.rmf.create_restraints(rh, m)
283  if frame != 0:
284  IMP.rmf.load_frame(rh, RMF.FrameID(frame))
285  return self.add_model(hiers, restraints, name=name, states=states,
286  ensembles=ensembles)
287 
288  def write(self, filename):
289  """Write out the IHM system to a named mmCIF or BinaryCIF file."""
290  if filename.endswith('.bcif'):
291  with open(filename, 'wb') as fh:
292  ihm.dumper.write(fh, [self.system], format='BCIF')
293  else:
294  with open(filename, 'w') as fh:
295  ihm.dumper.write(fh, [self.system])
296 
297  def report(self, fh=sys.stdout):
298  """Use python-ihm to print a summary report of the IHM system."""
299  self.system.report(fh)
def add_rmf
Add information to the system from a single frame in an RMF file.
Definition: mmcif/util.py:276
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
def report
Use python-ihm to print a summary report of the IHM system.
Definition: mmcif/util.py:297
def get_molecule
Given a Hierarchy, walk up and find the parent Molecule.
Definition: data.py:44
Convert one or more IMP Models and/or RMF frames to mmCIF or BinaryCIF.
Definition: mmcif/util.py:44
Classes to represent data structures used in mmCIF.
Definition: data.py:1
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
def write
Write out the IHM system to a named mmCIF or BinaryCIF file.
Definition: mmcif/util.py:288
def add_model
Add information to the system from an IMP Model.
Definition: mmcif/util.py:71
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
Map IMP restraints to mmCIF categories.
Restraints create_restraints(RMF::FileConstHandle fh, Model *m)
Track creation of a system fragment from clustering.
Definition: provenance.h:426
Store info for a chain of a protein.
Definition: Chain.h:61
Functionality for loading, creating, manipulating and scoring atomic structures.
Support for the RMF file format for storing hierarchical molecular data and markup.