1 """@namespace IMP.mmcif.util
2 @brief Utility functions for IMP.mmcif.
13 import ihm.representation
21 if sys.version_info[0] == 2:
23 _string_type = basestring
28 class _ChainIDs(object):
29 """Map indices to multi-character chain IDs.
30 We label the first 26 chains A-Z, then we move to two-letter
31 chain IDs: AA through AZ, then BA through BZ, through to ZZ.
32 This continues with longer chain IDs."""
33 def __getitem__(self, ind):
34 chars = string.ascii_uppercase
38 ids.append(chars[ind % lc])
40 ids.append(chars[ind])
41 return "".join(reversed(ids))
45 """Convert one or more IMP Models and/or RMF frames to mmCIF
48 Models can be added by calling `add_model`. The final output
49 is collected in a python-ihm `ihm.System` object, as the `system`
50 attribute, and can be written out using the python-ihm API or
51 with the convenience `write` method.
53 This class is still in development and does not yet convert all
54 IMP data to IHM mmCIF/BinaryCIF.
58 self.system = ihm.System()
59 self._state_by_name = {}
60 self._entities = IMP.mmcif.data._EntityMapper(self.system)
61 self._components = IMP.mmcif.data._ComponentMapper(self.system)
62 self._software = IMP.mmcif.data._AllSoftware(self.system)
63 self._external_files = IMP.mmcif.data._ExternalFiles(self.system)
64 self._datasets = IMP.mmcif.data._Datasets(self.system)
65 self._model_assemblies = IMP.mmcif.data._ModelAssemblies(self.system)
66 self._representations = IMP.mmcif.data._Representations(self.system)
67 self._protocols = IMP.mmcif.data._Protocols(self.system)
68 self._restraints = IMP.mmcif.restraint._AllRestraints(
69 self.system, self._components)
71 def add_model(self, hiers, restraints, name=None, states=None,
73 """Add information to the system from an IMP Model.
74 Multiple IHM Models (coordinate sets) may be created (one per
75 state per hierarchy). All IHM models in a state are given the
76 same name and are added to the given IHM Ensemble for that state
77 (or placed in a new Ensemble if not given). Only coordinates
78 for the given state names (or all states, if not given) are added.
79 The IHM Ensembles containing the new models are returned."""
82 if self.system.title
is None and len(hiers) > 0:
83 self.system.title = hiers[0].get_name()
85 for state_obj, state_hier, top_hier
in self._get_states(hiers, states):
86 e, model = self._add_hierarchy(
87 state_hier, top_hier, state_obj, name,
88 ensembles.get(state_obj.name))
89 ihm_models.append(model)
90 ensembles[state_obj.name] = e
91 self._add_restraints(restraints, ihm_models)
94 def _add_restraints(self, restraints, ihm_models):
97 all_rsr.extend(self._handle_restraint(r, ihm_models))
100 dsg = self._datasets.add_group(
101 [r.dataset
for r
in all_rsr
if r.dataset
is not None],
102 "All datasets used in modeling")
106 for step
in m.protocol.steps:
107 step.dataset_group = dsg
108 for analysis
in m.protocol.analyses:
109 for step
in analysis.steps:
110 step.dataset_group = dsg
112 def _handle_restraint(self, r, ihm_models):
114 for cif_r
in self._restraints.handle(r, ihm_models):
119 rs = IMP.RestraintSet.get_from(r)
123 for child
in rs.restraints:
124 for r
in self._handle_restraint(child, ihm_models):
127 def _ensure_unique_molecule_names(self, chains):
128 """Make sure that the input IMP molecule names are unique.
129 We assume that a given molecule name maps to an IHM asym,
130 so we need these names to be unique. Rename molecules if
132 def _get_unique_name(old_name, seen_names):
133 for copy_num
in itertools.count(1):
134 name = old_name +
" copy %d" % copy_num
135 if name
not in seen_names:
141 if mol
and mol.get_name():
142 name = mol.get_name()
143 if name
in seen_names:
145 name = _get_unique_name(old_name, seen_names)
147 print(
"Duplicate molecule name detected - reassigning "
148 "from %r to %r" % (old_name, name))
151 def _ensure_unique_chain_ids(self, chains):
152 """Make sure that IMP chain IDs are unique, since they are mapped to
153 IHM asym IDs, which need to be unique. Reassign them alphabetically
154 if duplicates are found."""
155 chain_ids = [c.get_id()
for c
in chains]
156 if len(set(chain_ids)) < len(chain_ids):
157 for chain, cid
in zip(chains, _ChainIDs()):
159 print(
"Duplicate chain IDs detected - reassigning from %s to %s"
160 % (chain_ids, [c.get_id()
for c
in chains]))
162 def _add_hierarchy(self, h, top_h, state, name, ensemble):
164 mg = ihm.model.ModelGroup()
166 ensemble = ihm.model.Ensemble(model_group=mg, num_models=0)
167 self.system.ensembles.append(ensemble)
169 for c
in IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE)]
170 self._ensure_unique_molecule_names(chains)
171 self._ensure_unique_chain_ids(chains)
173 raise ValueError(
"No chains found in %s" % h)
175 ch = IMP.mmcif.data._CoordinateHandler(self.system, self._datasets)
177 ps = ch.get_structure_particles(chain)
178 comp = self._add_chain(chain, ch.get_residue_sequence(ps))
179 asyms.append(comp.asym_unit)
180 ch.add_chain(ps, comp.asym_unit)
181 representation = self._representations.add(ch._representation)
182 assembly = self._model_assemblies.add(asyms)
183 self._add_hierarchy_ensemble_info(h, top_h, ensemble)
184 self._software.add_hierarchy(h, top_h)
185 protocol = self._protocols._add_hierarchy(h, top_h, assembly,
187 self._external_files.add_hierarchy(h, top_h)
188 model = ihm.model.Model(assembly=assembly, protocol=protocol,
189 representation=representation, name=name)
190 model._atoms = ch._atoms
191 model._spheres = ch._spheres
192 ensemble.model_group.append(model)
193 ensemble.num_models += 1
194 return ensemble, model
196 def _add_hierarchy_ensemble_info(self, h, top_h, ensemble):
197 """Add ensemble-specific information from the given hierarchy"""
198 for prov
in reversed(list(_get_all_state_provenance(
200 ensemble.num_models = prov.get_number_of_members()
201 ensemble.precision = prov.get_precision()
202 ensemble.name = prov.get_name()
203 d = prov.get_density()
204 if d
and d.endswith(
'.json'):
207 self._parse_json_density(j, d, ensemble)
209 def _parse_json_density(self, j, json_fname, ensemble):
210 """Add information from cluster density JSON file"""
211 if j.get(
'producer')
and j[
'producer'].get(
'name') ==
'IMP.sampcon':
212 self._parse_sampcon(j, json_fname, ensemble)
214 def _parse_sampcon(self, j, json_fname, ensemble):
215 """Add information from IMP.sampcon-generated JSON file"""
216 ranges = j[
'density_custom_ranges']
217 for cluster
in j[
'clusters']:
218 if cluster[
'name'] == ensemble.name:
219 for range_name, mrc
in cluster[
'density'].items():
220 sel_tuple = ranges[range_name]
221 if len(sel_tuple) > 1:
222 raise ValueError(
"Cannot handle sel tuple")
223 asym = self._parse_sel_tuple(sel_tuple[0])
227 density = ihm.model.LocalizationDensity(
228 file=ihm.location.OutputFileLocation(
229 path=full_mrc, details=
'Localization density'),
231 ensemble.densities.append(density)
233 def _parse_sel_tuple(self, t):
234 """Convert a PMI-style selection tuple into an IHM object"""
236 for asym
in self.system.asym_units:
237 asym_map[asym.details] = asym
238 if isinstance(t, _string_type):
240 elif isinstance(t, (list, tuple))
and len(t) == 3:
241 return asym_map[t[2]](t[0], t[1])
243 raise TypeError(
"Cannot handle selection tuple: %s" % t)
245 def _get_states(self, hiers, states):
246 def get_state_by_name(name):
247 if states
is not None and name
not in states:
249 if name
not in self._state_by_name:
250 self._add_state(ihm.model.State(name=name))
251 return self._state_by_name[name]
254 state_hiers = IMP.atom.get_by_type(h, IMP.atom.STATE_TYPE)
255 for state_hier
in state_hiers:
256 state_obj = get_state_by_name(state_hier.get_name())
257 if state_obj
is not None:
258 yield state_obj, state_hier, h
260 if len(state_hiers) == 0:
261 state_obj = get_state_by_name(
None)
262 if state_obj
is not None:
263 yield state_obj, h,
None
265 def _add_state(self, state):
266 if not self.system.state_groups:
267 self.system.state_groups.append(ihm.model.StateGroup())
268 self.system.state_groups[-1].append(state)
269 self._state_by_name[state.name] = state
271 def _add_chain(self, chain, seq_from_res):
272 entity, offset = self._entities.add(chain, seq_from_res)
273 component = self._components.add(chain, entity, offset)
276 def add_rmf(self, filename, name=None, frame=0, states=None,
278 """Add information to the system from a single frame in an RMF file."""
280 rh = RMF.open_rmf_file_read_only(filename)
285 return self.
add_model(hiers, restraints, name=name, states=states,
289 """Write out the IHM system to a named mmCIF or BinaryCIF file."""
290 if filename.endswith(
'.bcif'):
291 with open(filename,
'wb')
as fh:
292 ihm.dumper.write(fh, [self.system], format=
'BCIF')
294 with open(filename,
'w')
as fh:
295 ihm.dumper.write(fh, [self.system])
298 """Use python-ihm to print a summary report of the IHM system."""
299 self.system.report(fh)
def add_rmf
Add information to the system from a single frame in an RMF file.
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
def report
Use python-ihm to print a summary report of the IHM system.
def get_molecule
Given a Hierarchy, walk up and find the parent Molecule.
Convert one or more IMP Models and/or RMF frames to mmCIF or BinaryCIF.
Classes to represent data structures used in mmCIF.
Class for storing model, its restraints, constraints, and particles.
def write
Write out the IHM system to a named mmCIF or BinaryCIF file.
def add_model
Add information to the system from an IMP Model.
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
Map IMP restraints to mmCIF categories.
Restraints create_restraints(RMF::FileConstHandle fh, Model *m)
Track creation of a system fragment from clustering.
Store info for a chain of a protein.
Functionality for loading, creating, manipulating and scoring atomic structures.
Support for the RMF file format for storing hierarchical molecular data and markup.