IMP logo
IMP Reference Guide  develop.63b38c487d,2024/12/22
The Integrative Modeling Platform
mmcif/util.py
1 """@namespace IMP.mmcif.util
2  @brief Utility functions for IMP.mmcif.
3 """
4 
5 import ihm.location
6 import ihm.dumper
7 import IMP.mmcif.data
9 import IMP.rmf
10 import IMP.atom
11 import RMF
12 import ihm.format
13 import ihm.representation
14 import string
15 import itertools
16 import json
17 import sys
18 from IMP.mmcif.data import _get_all_state_provenance
19 
20 
21 class _ChainIDs:
22  """Map indices to multi-character chain IDs.
23  We label the first 26 chains A-Z, then we move to two-letter
24  chain IDs: AA through AZ, then BA through BZ, through to ZZ.
25  This continues with longer chain IDs."""
26  def __getitem__(self, ind):
27  chars = string.ascii_uppercase
28  lc = len(chars)
29  ids = []
30  while ind >= lc:
31  ids.append(chars[ind % lc])
32  ind = ind // lc - 1
33  ids.append(chars[ind])
34  return "".join(reversed(ids))
35 
36 
37 class Writer:
38  """Convert one or more IMP Models and/or RMF frames to mmCIF
39  or BinaryCIF.
40 
41  Models can be added by calling `add_model`. The final output
42  is collected in a python-ihm `ihm.System` object, as the `system`
43  attribute, and can be written out using the python-ihm API or
44  with the convenience `write` method.
45 
46  This class is still in development and does not yet convert all
47  IMP data to IHM mmCIF/BinaryCIF.
48  """
49 
50  def __init__(self):
51  self.system = ihm.System()
52  self._state_by_name = {}
53  self._entities = IMP.mmcif.data._EntityMapper(self.system)
54  self._components = IMP.mmcif.data._ComponentMapper(self.system)
55  self._software = IMP.mmcif.data._AllSoftware(self.system)
56  self._external_files = IMP.mmcif.data._ExternalFiles(self.system)
57  self._datasets = IMP.mmcif.data._Datasets(self.system)
58  self._model_assemblies = IMP.mmcif.data._ModelAssemblies(self.system)
59  self._representations = IMP.mmcif.data._Representations(self.system)
60  self._protocols = IMP.mmcif.data._Protocols(self.system)
61  self._restraints = IMP.mmcif.restraint._AllRestraints(
62  self.system, self._components)
63 
64  def add_model(self, hiers, restraints, name=None, states=None,
65  ensembles=None):
66  """Add information to the system from an IMP Model.
67  Multiple IHM Models (coordinate sets) may be created (one per
68  state per hierarchy). All IHM models in a state are given the
69  same name and are added to the given IHM Ensemble for that state
70  (or placed in a new Ensemble if not given). Only coordinates
71  for the given state names (or all states, if not given) are added.
72  The IHM Ensembles containing the new models are returned."""
73  if ensembles is None:
74  ensembles = {}
75  if self.system.title is None and len(hiers) > 0:
76  self.system.title = hiers[0].get_name()
77  ihm_models = []
78  for state_obj, state_hier, top_hier in self._get_states(hiers, states):
79  e, model = self._add_hierarchy(
80  state_hier, top_hier, state_obj, name,
81  ensembles.get(state_obj.name))
82  ihm_models.append(model)
83  ensembles[state_obj.name] = e
84  self._add_restraints(restraints, ihm_models)
85  return ensembles
86 
87  def _add_restraints(self, restraints, ihm_models):
88  all_rsr = []
89  for r in restraints:
90  all_rsr.extend(self._handle_restraint(r, ihm_models))
91  # IMP provenance doesn't currently record which restraints were
92  # used in modeling, so assume all of them were
93  dsg = self._datasets.add_group(
94  [r.dataset for r in all_rsr if r.dataset is not None],
95  "All datasets used in modeling")
96  for m in ihm_models:
97  if not m.protocol:
98  continue
99  for step in m.protocol.steps:
100  step.dataset_group = dsg
101  for analysis in m.protocol.analyses:
102  for step in analysis.steps:
103  step.dataset_group = dsg
104 
105  def _handle_restraint(self, r, ihm_models):
106  num_cif_r = 0
107  for cif_r in self._restraints.handle(r, ihm_models):
108  num_cif_r += 1
109  yield cif_r
110  if num_cif_r == 0:
111  try:
112  rs = IMP.RestraintSet.get_from(r)
113  except ValueError:
114  rs = None
115  if rs:
116  for child in rs.restraints:
117  for r in self._handle_restraint(child, ihm_models):
118  yield r
119 
120  def _ensure_unique_molecule_names(self, chains):
121  """Make sure that the input IMP molecule names are unique.
122  We assume that a given molecule name maps to an IHM asym,
123  so we need these names to be unique. Rename molecules if
124  necessary."""
125  def _get_unique_name(old_name, seen_names):
126  for copy_num in itertools.count(1):
127  name = old_name + " copy %d" % copy_num
128  if name not in seen_names:
129  return name
130 
131  seen_names = set()
132  for c in chains:
134  if mol and mol.get_name():
135  name = mol.get_name()
136  if name in seen_names:
137  old_name = name
138  name = _get_unique_name(old_name, seen_names)
139  mol.set_name(name)
140  print("Duplicate molecule name detected - reassigning "
141  "from %r to %r" % (old_name, name))
142  seen_names.add(name)
143 
144  def _ensure_unique_chain_ids(self, chains):
145  """Make sure that IMP chain IDs are unique, since they are mapped to
146  IHM asym IDs, which need to be unique. Reassign them alphabetically
147  if duplicates are found."""
148  chain_ids = [c.get_id() for c in chains]
149  if len(set(chain_ids)) < len(chain_ids):
150  for chain, cid in zip(chains, _ChainIDs()):
151  chain.set_id(cid)
152  print("Duplicate chain IDs detected - reassigning from %s to %s"
153  % (chain_ids, [c.get_id() for c in chains]))
154 
155  def _add_hierarchy(self, h, top_h, state, name, ensemble):
156  if ensemble is None:
157  mg = ihm.model.ModelGroup()
158  state.append(mg)
159  ensemble = ihm.model.Ensemble(model_group=mg, num_models=0)
160  self.system.ensembles.append(ensemble)
161  chains = [IMP.atom.Chain(c)
162  for c in IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE)]
163  self._ensure_unique_molecule_names(chains)
164  self._ensure_unique_chain_ids(chains)
165  if len(chains) == 0:
166  raise ValueError("No chains found in %s" % h)
167  asyms = []
168  ch = IMP.mmcif.data._CoordinateHandler(self.system, self._datasets)
169  for chain in chains:
170  ps = ch.get_structure_particles(chain)
171  comp = self._add_chain(chain, ch.get_residue_sequence(ps))
172  asyms.append(comp.asym_unit)
173  ch.add_chain(ps, comp.asym_unit)
174  representation = self._representations.add(ch._representation)
175  assembly = self._model_assemblies.add(asyms)
176  self._add_hierarchy_ensemble_info(h, top_h, ensemble)
177  self._software.add_hierarchy(h, top_h)
178  protocol = self._protocols._add_hierarchy(h, top_h, assembly,
179  self._software)
180  self._external_files.add_hierarchy(h, top_h)
181  model = ihm.model.Model(assembly=assembly, protocol=protocol,
182  representation=representation, name=name)
183  model._atoms = ch._atoms
184  model._spheres = ch._spheres
185  ensemble.model_group.append(model)
186  ensemble.num_models += 1
187  return ensemble, model
188 
189  def _add_hierarchy_ensemble_info(self, h, top_h, ensemble):
190  """Add ensemble-specific information from the given hierarchy"""
191  for prov in reversed(list(_get_all_state_provenance(
192  h, top_h, types=[IMP.core.ClusterProvenance]))):
193  ensemble.num_models = prov.get_number_of_members()
194  ensemble.precision = prov.get_precision()
195  ensemble.name = prov.get_name()
196  d = prov.get_density()
197  if d and d.endswith('.json'):
198  with open(d) as fh:
199  j = json.load(fh)
200  self._parse_json_density(j, d, ensemble)
201 
202  def _parse_json_density(self, j, json_fname, ensemble):
203  """Add information from cluster density JSON file"""
204  if j.get('producer') and j['producer'].get('name') == 'IMP.sampcon':
205  self._parse_sampcon(j, json_fname, ensemble)
206 
207  def _parse_sampcon(self, j, json_fname, ensemble):
208  """Add information from IMP.sampcon-generated JSON file"""
209  ranges = j['density_custom_ranges']
210  for cluster in j['clusters']:
211  if cluster['name'] == ensemble.name:
212  for range_name, mrc in cluster['density'].items():
213  sel_tuple = ranges[range_name]
214  if len(sel_tuple) > 1:
215  raise ValueError("Cannot handle sel tuple")
216  asym = self._parse_sel_tuple(sel_tuple[0])
217  # Path is relative to that of the JSON file
218  # With Python 2, convert mrc from unicode to str
219  full_mrc = IMP.get_relative_path(json_fname, str(mrc))
220  density = ihm.model.LocalizationDensity(
221  file=ihm.location.OutputFileLocation(
222  path=full_mrc, details='Localization density'),
223  asym_unit=asym)
224  ensemble.densities.append(density)
225 
226  def _parse_sel_tuple(self, t):
227  """Convert a PMI-style selection tuple into an IHM object"""
228  asym_map = {}
229  for asym in self.system.asym_units:
230  asym_map[asym.details] = asym
231  if isinstance(t, str):
232  return asym_map[t]
233  elif isinstance(t, (list, tuple)) and len(t) == 3:
234  return asym_map[t[2]](t[0], t[1])
235  else:
236  raise TypeError("Cannot handle selection tuple: %s" % t)
237 
238  def _get_states(self, hiers, states):
239  def get_state_by_name(name):
240  if states is not None and name not in states:
241  return None
242  if name not in self._state_by_name:
243  self._add_state(ihm.model.State(name=name))
244  return self._state_by_name[name]
245 
246  for h in hiers:
247  state_hiers = IMP.atom.get_by_type(h, IMP.atom.STATE_TYPE)
248  for state_hier in state_hiers:
249  state_obj = get_state_by_name(state_hier.get_name())
250  if state_obj is not None:
251  yield state_obj, state_hier, h
252  # If no state nodes, treat everything as in a single unnamed state
253  if len(state_hiers) == 0:
254  state_obj = get_state_by_name(None)
255  if state_obj is not None:
256  yield state_obj, h, None
257 
258  def _add_state(self, state):
259  if not self.system.state_groups:
260  self.system.state_groups.append(ihm.model.StateGroup())
261  self.system.state_groups[-1].append(state)
262  self._state_by_name[state.name] = state
263 
264  def _add_chain(self, chain, seq_from_res):
265  entity, offset = self._entities.add(chain, seq_from_res)
266  component = self._components.add(chain, entity, offset)
267  return component
268 
269  def add_rmf(self, filename, name=None, frame=0, states=None,
270  ensembles=None):
271  """Add information to the system from a single frame in an RMF file."""
272  m = IMP.Model()
273  rh = RMF.open_rmf_file_read_only(filename)
274  hiers = IMP.rmf.create_hierarchies(rh, m)
275  restraints = IMP.rmf.create_restraints(rh, m)
276  if frame != 0:
277  IMP.rmf.load_frame(rh, RMF.FrameID(frame))
278  return self.add_model(hiers, restraints, name=name, states=states,
279  ensembles=ensembles)
280 
281  def write(self, filename):
282  """Write out the IHM system to a named mmCIF or BinaryCIF file."""
283  if filename.endswith('.bcif'):
284  with open(filename, 'wb') as fh:
285  ihm.dumper.write(fh, [self.system], format='BCIF')
286  else:
287  with open(filename, 'w') as fh:
288  ihm.dumper.write(fh, [self.system])
289 
290  def report(self, fh=sys.stdout):
291  """Use python-ihm to print a summary report of the IHM system."""
292  self.system.report(fh)
def add_rmf
Add information to the system from a single frame in an RMF file.
Definition: mmcif/util.py:269
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
def report
Use python-ihm to print a summary report of the IHM system.
Definition: mmcif/util.py:290
def get_molecule
Given a Hierarchy, walk up and find the parent Molecule.
Definition: data.py:44
Convert one or more IMP Models and/or RMF frames to mmCIF or BinaryCIF.
Definition: mmcif/util.py:37
Classes to represent data structures used in mmCIF.
Definition: data.py:1
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
def write
Write out the IHM system to a named mmCIF or BinaryCIF file.
Definition: mmcif/util.py:281
def add_model
Add information to the system from an IMP Model.
Definition: mmcif/util.py:64
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
Map IMP restraints to mmCIF categories.
Restraints create_restraints(RMF::FileConstHandle fh, Model *m)
Track creation of a system fragment from clustering.
Definition: provenance.h:426
Store info for a chain of a protein.
Definition: Chain.h:61
Functionality for loading, creating, manipulating and scoring atomic structures.
Support for the RMF file format for storing hierarchical molecular data and markup.