1 """@namespace IMP.mmcif.util
2 @brief Utility functions for IMP.mmcif.
13 import ihm.representation
22 """Map indices to multi-character chain IDs.
23 We label the first 26 chains A-Z, then we move to two-letter
24 chain IDs: AA through AZ, then BA through BZ, through to ZZ.
25 This continues with longer chain IDs."""
26 def __getitem__(self, ind):
27 chars = string.ascii_uppercase
31 ids.append(chars[ind % lc])
33 ids.append(chars[ind])
34 return "".join(reversed(ids))
38 """Convert one or more IMP Models and/or RMF frames to mmCIF
41 Models can be added by calling `add_model`. The final output
42 is collected in a python-ihm `ihm.System` object, as the `system`
43 attribute, and can be written out using the python-ihm API or
44 with the convenience `write` method.
46 This class is still in development and does not yet convert all
47 IMP data to IHM mmCIF/BinaryCIF.
51 self.system = ihm.System()
52 self._state_by_name = {}
53 self._entities = IMP.mmcif.data._EntityMapper(self.system)
54 self._components = IMP.mmcif.data._ComponentMapper(self.system)
55 self._software = IMP.mmcif.data._AllSoftware(self.system)
56 self._external_files = IMP.mmcif.data._ExternalFiles(self.system)
57 self._datasets = IMP.mmcif.data._Datasets(self.system)
58 self._model_assemblies = IMP.mmcif.data._ModelAssemblies(self.system)
59 self._representations = IMP.mmcif.data._Representations(self.system)
60 self._protocols = IMP.mmcif.data._Protocols(self.system)
61 self._restraints = IMP.mmcif.restraint._AllRestraints(
62 self.system, self._components)
64 def add_model(self, hiers, restraints, name=None, states=None,
66 """Add information to the system from an IMP Model.
67 Multiple IHM Models (coordinate sets) may be created (one per
68 state per hierarchy). All IHM models in a state are given the
69 same name and are added to the given IHM Ensemble for that state
70 (or placed in a new Ensemble if not given). Only coordinates
71 for the given state names (or all states, if not given) are added.
72 The IHM Ensembles containing the new models are returned."""
75 if self.system.title
is None and len(hiers) > 0:
76 self.system.title = hiers[0].get_name()
78 for state_obj, state_hier, top_hier
in self._get_states(hiers, states):
79 e, model = self._add_hierarchy(
80 state_hier, top_hier, state_obj, name,
81 ensembles.get(state_obj.name))
82 ihm_models.append(model)
83 ensembles[state_obj.name] = e
84 self._add_restraints(restraints, ihm_models)
87 def _add_restraints(self, restraints, ihm_models):
90 all_rsr.extend(self._handle_restraint(r, ihm_models))
93 dsg = self._datasets.add_group(
94 [r.dataset
for r
in all_rsr
if r.dataset
is not None],
95 "All datasets used in modeling")
99 for step
in m.protocol.steps:
100 step.dataset_group = dsg
101 for analysis
in m.protocol.analyses:
102 for step
in analysis.steps:
103 step.dataset_group = dsg
105 def _handle_restraint(self, r, ihm_models):
107 for cif_r
in self._restraints.handle(r, ihm_models):
112 rs = IMP.RestraintSet.get_from(r)
116 for child
in rs.restraints:
117 for r
in self._handle_restraint(child, ihm_models):
120 def _ensure_unique_molecule_names(self, chains):
121 """Make sure that the input IMP molecule names are unique.
122 We assume that a given molecule name maps to an IHM asym,
123 so we need these names to be unique. Rename molecules if
125 def _get_unique_name(old_name, seen_names):
126 for copy_num
in itertools.count(1):
127 name = old_name +
" copy %d" % copy_num
128 if name
not in seen_names:
134 if mol
and mol.get_name():
135 name = mol.get_name()
136 if name
in seen_names:
138 name = _get_unique_name(old_name, seen_names)
140 print(
"Duplicate molecule name detected - reassigning "
141 "from %r to %r" % (old_name, name))
144 def _ensure_unique_chain_ids(self, chains):
145 """Make sure that IMP chain IDs are unique, since they are mapped to
146 IHM asym IDs, which need to be unique. Reassign them alphabetically
147 if duplicates are found."""
148 chain_ids = [c.get_id()
for c
in chains]
149 if len(set(chain_ids)) < len(chain_ids):
150 for chain, cid
in zip(chains, _ChainIDs()):
152 print(
"Duplicate chain IDs detected - reassigning from %s to %s"
153 % (chain_ids, [c.get_id()
for c
in chains]))
155 def _add_hierarchy(self, h, top_h, state, name, ensemble):
157 mg = ihm.model.ModelGroup()
159 ensemble = ihm.model.Ensemble(model_group=mg, num_models=0)
160 self.system.ensembles.append(ensemble)
162 for c
in IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE)]
163 self._ensure_unique_molecule_names(chains)
164 self._ensure_unique_chain_ids(chains)
166 raise ValueError(
"No chains found in %s" % h)
168 ch = IMP.mmcif.data._CoordinateHandler(self.system, self._datasets)
170 ps = ch.get_structure_particles(chain)
171 comp = self._add_chain(chain, ch.get_residue_sequence(ps))
172 asyms.append(comp.asym_unit)
173 ch.add_chain(ps, comp.asym_unit)
174 representation = self._representations.add(ch._representation)
175 assembly = self._model_assemblies.add(asyms)
176 self._add_hierarchy_ensemble_info(h, top_h, ensemble)
177 self._software.add_hierarchy(h, top_h)
178 protocol = self._protocols._add_hierarchy(h, top_h, assembly,
180 self._external_files.add_hierarchy(h, top_h)
181 model = ihm.model.Model(assembly=assembly, protocol=protocol,
182 representation=representation, name=name)
183 model._atoms = ch._atoms
184 model._spheres = ch._spheres
185 ensemble.model_group.append(model)
186 ensemble.num_models += 1
187 return ensemble, model
189 def _add_hierarchy_ensemble_info(self, h, top_h, ensemble):
190 """Add ensemble-specific information from the given hierarchy"""
191 for prov
in reversed(list(_get_all_state_provenance(
193 ensemble.num_models = prov.get_number_of_members()
194 ensemble.precision = prov.get_precision()
195 ensemble.name = prov.get_name()
196 d = prov.get_density()
197 if d
and d.endswith(
'.json'):
200 self._parse_json_density(j, d, ensemble)
202 def _parse_json_density(self, j, json_fname, ensemble):
203 """Add information from cluster density JSON file"""
204 if j.get(
'producer')
and j[
'producer'].get(
'name') ==
'IMP.sampcon':
205 self._parse_sampcon(j, json_fname, ensemble)
207 def _parse_sampcon(self, j, json_fname, ensemble):
208 """Add information from IMP.sampcon-generated JSON file"""
209 ranges = j[
'density_custom_ranges']
210 for cluster
in j[
'clusters']:
211 if cluster[
'name'] == ensemble.name:
212 for range_name, mrc
in cluster[
'density'].items():
213 sel_tuple = ranges[range_name]
214 if len(sel_tuple) > 1:
215 raise ValueError(
"Cannot handle sel tuple")
216 asym = self._parse_sel_tuple(sel_tuple[0])
220 density = ihm.model.LocalizationDensity(
221 file=ihm.location.OutputFileLocation(
222 path=full_mrc, details=
'Localization density'),
224 ensemble.densities.append(density)
226 def _parse_sel_tuple(self, t):
227 """Convert a PMI-style selection tuple into an IHM object"""
229 for asym
in self.system.asym_units:
230 asym_map[asym.details] = asym
231 if isinstance(t, str):
233 elif isinstance(t, (list, tuple))
and len(t) == 3:
234 return asym_map[t[2]](t[0], t[1])
236 raise TypeError(
"Cannot handle selection tuple: %s" % t)
238 def _get_states(self, hiers, states):
239 def get_state_by_name(name):
240 if states
is not None and name
not in states:
242 if name
not in self._state_by_name:
243 self._add_state(ihm.model.State(name=name))
244 return self._state_by_name[name]
247 state_hiers = IMP.atom.get_by_type(h, IMP.atom.STATE_TYPE)
248 for state_hier
in state_hiers:
249 state_obj = get_state_by_name(state_hier.get_name())
250 if state_obj
is not None:
251 yield state_obj, state_hier, h
253 if len(state_hiers) == 0:
254 state_obj = get_state_by_name(
None)
255 if state_obj
is not None:
256 yield state_obj, h,
None
258 def _add_state(self, state):
259 if not self.system.state_groups:
260 self.system.state_groups.append(ihm.model.StateGroup())
261 self.system.state_groups[-1].append(state)
262 self._state_by_name[state.name] = state
264 def _add_chain(self, chain, seq_from_res):
265 entity, offset = self._entities.add(chain, seq_from_res)
266 component = self._components.add(chain, entity, offset)
269 def add_rmf(self, filename, name=None, frame=0, states=None,
271 """Add information to the system from a single frame in an RMF file."""
273 rh = RMF.open_rmf_file_read_only(filename)
278 return self.
add_model(hiers, restraints, name=name, states=states,
282 """Write out the IHM system to a named mmCIF or BinaryCIF file."""
283 if filename.endswith(
'.bcif'):
284 with open(filename,
'wb')
as fh:
285 ihm.dumper.write(fh, [self.system], format=
'BCIF')
287 with open(filename,
'w')
as fh:
288 ihm.dumper.write(fh, [self.system])
291 """Use python-ihm to print a summary report of the IHM system."""
292 self.system.report(fh)
def add_rmf
Add information to the system from a single frame in an RMF file.
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
def report
Use python-ihm to print a summary report of the IHM system.
def get_molecule
Given a Hierarchy, walk up and find the parent Molecule.
Convert one or more IMP Models and/or RMF frames to mmCIF or BinaryCIF.
Classes to represent data structures used in mmCIF.
Class for storing model, its restraints, constraints, and particles.
def write
Write out the IHM system to a named mmCIF or BinaryCIF file.
def add_model
Add information to the system from an IMP Model.
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
Map IMP restraints to mmCIF categories.
Restraints create_restraints(RMF::FileConstHandle fh, Model *m)
Track creation of a system fragment from clustering.
Store info for a chain of a protein.
Functionality for loading, creating, manipulating and scoring atomic structures.
Support for the RMF file format for storing hierarchical molecular data and markup.