IMP logo
IMP Reference Guide  2.19.0
The Integrative Modeling Platform
mmcif/util.py
1 """@namespace IMP.mmcif.util
2  @brief Utility functions for IMP.mmcif.
3 """
4 
5 import ihm.location
6 import ihm.dumper
7 import IMP.mmcif.data
9 import IMP.rmf
10 import IMP.atom
11 import RMF
12 import ihm.format
13 import ihm.representation
14 import string
15 import weakref
16 import operator
17 
18 
19 class _ChainIDs(object):
20  """Map indices to multi-character chain IDs.
21  We label the first 26 chains A-Z, then we move to two-letter
22  chain IDs: AA through AZ, then BA through BZ, through to ZZ.
23  This continues with longer chain IDs."""
24  def __getitem__(self, ind):
25  chars = string.ascii_uppercase
26  lc = len(chars)
27  ids = []
28  while ind >= lc:
29  ids.append(chars[ind % lc])
30  ind = ind // lc - 1
31  ids.append(chars[ind])
32  return "".join(reversed(ids))
33 
34 
35 class RMFFrame(object):
36  """An individual state conformation read from a PDB file"""
37  def __init__(self, filename, frame, name):
38  self.filename, self.frame = filename, frame
39  self.name = name
40 
41  def create(self, model):
42  rmf = RMF.open_rmf_file_read_only(self.filename)
43  # todo: support frame!=0
44  hiers = IMP.rmf.create_hierarchies(rmf, model)
45  restraints = IMP.rmf.create_restraints(rmf, model)
46  return hiers, restraints
47 
48  def link(self, hiers, restraints):
49  rmf = RMF.open_rmf_file_read_only(self.filename)
50  IMP.rmf.link_hierarchies(rmf, hiers)
51  IMP.rmf.link_restraints(rmf, restraints)
52  IMP.rmf.load_frame(rmf, RMF.FrameID(self.frame))
53 
54 
55 class _ModelFrame(object):
56  """An individual state conformation read from an IMP.Model"""
57  def __init__(self, hiers, restraints, name):
58  self.hiers, self.restraints = hiers, restraints
59  self.name = name
60 
61  def create(self, model):
62  return self.hiers, self.restraints
63 
64  def link(self, hiers, restraints):
65  if len(hiers) != len(self.hiers) \
66  or len(restraints) != len(self.restraints):
67  raise ValueError("Frames do not match")
68  hiers[:] = self.hiers
69  # todo: this won't work currently because the Restraint objects
70  # will change
71  restraints[:] = self.restraints
72 
73 
74 class _NonModeledChain(object):
75  """Represent a chain that was experimentally characterized but not modeled.
76  Such a chain resembles an IMP.atom.Chain, but has no associated
77  structure, and belongs to no state."""
78  def __init__(self, name, sequence, chain_type):
79  self.name = name
80  self.sequence = sequence
81  self.chain_type = chain_type
82 
83  def get_sequence(self):
84  return self.sequence
85 
86 
87 class System(object):
88  def __init__(self):
89  self.system = ihm.System()
90  self._states = []
91  self._ensembles = []
92  self._frames = []
93 
94  self.entities = IMP.mmcif.data._EntityMapper(self.system)
95  self.components = IMP.mmcif.data._ComponentMapper(self.system)
96  self._software = IMP.mmcif.data._AllSoftware(self.system)
97  self._external_files = IMP.mmcif.data._ExternalFiles(self.system)
98  self.datasets = IMP.mmcif.data._Datasets(self.system)
99  # All modeling protocols
100  self.protocols = IMP.mmcif.data._Protocols(self.system)
101  self.representation = ihm.representation.Representation()
102  self.system.orphan_representations.append(self.representation)
103 
104  def _update_location(self, fileloc):
105  """Update FileLocation to point to a parent repository, if any"""
106  ihm.location.Repository._update_in_repos(fileloc,
107  self._external_files._repos)
108 
109  def add_repository(self, doi, root=None, url=None, top_directory=None):
110  """Add a repository containing one or more modeling files."""
111  self._external_files.add_repo(ihm.location.Repository(
112  doi, root, url, top_directory))
113 
114  def _add_state(self, state):
115  if not self.system.state_groups:
116  self.system.state_groups.append(ihm.model.StateGroup())
117  self.system.state_groups[-1].append(state)
118  self._states.append(state)
119 
120  def _add_ensemble(self, ensemble):
121  self._ensembles.append(ensemble)
122  self.system.ensembles.append(ensemble)
123 
124  def _add_frame(self, frame):
125  self._frames.append(frame)
126  frame.id = len(self._frames)
127 
128  def _add_hierarchy(self, h, state):
129  if self.system.title is None:
130  self.system.title = h.get_name()
131  chains = [IMP.atom.Chain(c)
132  for c in IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE)]
133  if len(chains) == 0:
134  raise ValueError("No chains found in %s" % h)
135  # todo: handle same chain in multiple states
136  for c in chains:
137  component = self._add_chain(c)
138  state._all_modeled_components.append(component)
139  if hasattr(component, 'asym_unit'):
140  state.modeled_assembly.append(component.asym_unit)
141  else:
142  state.modeled_assembly.append(component.entity)
143  state.repsegments[component] = \
144  list(self._get_repsegments(
145  c, component, self._get_all_starting_models(component)))
146  # Number of states that have representation for this component
147  num_state_reps = len([s for s in self._states
148  if component in s.repsegments])
149  # Assume representation for a given component is the same in all
150  # states, so we only need one copy of it in the mmCIF file
151  if num_state_reps == 1:
152  self.representation.extend(state.repsegments[component])
153  self._software.add_hierarchy(h)
154  self.protocols._add_hierarchy(h, state.modeled_assembly,
155  self._software)
156  self._external_files.add_hierarchy(h)
157 
158  def _get_all_starting_models(self, comp):
159  """Get all starting models (in all states) for the given component"""
160  for state in self._states:
161  for seg in state.repsegments.get(comp, []):
162  if seg.starting_model:
163  yield seg.starting_model
164 
165  def _get_repsegments(self, chain, component, existing_starting_models):
166  """Yield groups of particles under chain with same representation"""
167  smf = IMP.mmcif.data._StartingModelFinder(component,
168  existing_starting_models)
169  segfactory = IMP.mmcif.data._RepSegmentFactory(component)
170 
171  for sp in self._get_structure_particles(chain):
172  starting_model = smf.find(sp, self)
173  seg = segfactory.add(sp, starting_model)
174  if seg:
175  yield seg
176  last = segfactory.get_last()
177  if last:
178  yield last
179 
180  def _get_structure_particles(self, chain):
181  """Yield all particles under chain with coordinates.
182  They are sorted by residue index."""
183  # todo: handle Representation decorators for non-PMI-1 models
184  ps = IMP.atom.get_leaves(chain)
185  resind_dict = {}
186  for p in ps:
188  residue = IMP.atom.Residue(p)
189  resind = residue.get_index()
190  if resind in resind_dict:
191  continue
192  resind_dict[resind] = residue
194  fragment = IMP.atom.Fragment(p)
195  # todo: handle non-contiguous fragments
196  resinds = fragment.get_residue_indexes()
197  resind = resinds[len(resinds) // 2]
198  if resind in resind_dict:
199  continue
200  resind_dict[resind] = fragment
201  # Return values sorted by key (residue index)
202  for item in sorted(resind_dict.items(), key=operator.itemgetter(0)):
203  yield item[1]
204 
205  def add_non_modeled_chain(self, name, sequence,
206  chain_type=IMP.atom.UnknownChainType):
207  """Add a chain that wasn't modeled by IMP."""
208  c = _NonModeledChain(name, sequence, chain_type)
209  self._add_chain(c)
210 
211  def _add_chain(self, c):
212  entity = self.entities.add(c)
213  component = self.components.add(c, entity)
214  return component
215 
216  def write(self, fname):
217  with open(fname, 'w') as fh:
218  ihm.dumper.write(fh, [self.system])
219 
220 
221 class State(ihm.model.State):
222  """Represent a single IMP state."""
223  def __init__(self, system):
224  super(State, self).__init__()
225  self.system = weakref.proxy(system)
226  system._add_state(self)
227  self.model = IMP.Model()
228  self.hiers = None
229  self._wrapped_restraints = []
230  # The assembly of all components modeled by IMP in this state.
231  # This may be smaller than the complete assembly.
232  self.modeled_assembly = ihm.Assembly(
233  name="Modeled assembly",
234  description="All components modeled by IMP")
235  system.system.orphan_assemblies.append(self.modeled_assembly)
236  # A list of ihm.representation.Segment objects for each Component
237  self.repsegments = {}
238  self._frames = []
239 
240  self._all_modeled_components = []
241 
242  def _add_frame(self, f, model):
243  self._frames.append(f)
244  self.system._add_frame(f)
245  if self._load_frame(f):
246  for h in self.hiers:
247  self._add_hierarchy(h)
248  self._add_restraints(self.restraints, model)
249  else:
250  self._update_restraints(model)
251 
252  def _load_frame(self, f):
253  """Load hierarchies and restraints from a frame.
254  Return True if this results in making new hierarchies."""
255  if self.hiers is None:
256  self.hiers, self.restraints = f.create(self.model)
257  self._remove_duplicate_chain_ids(True)
258  return True
259  else:
260  f.link(self.hiers, self.restraints)
261  self._remove_duplicate_chain_ids(False)
262  return False
263 
264  def _remove_duplicate_chain_ids(self, new_hiers):
265  chains = []
266  for h in self.hiers:
267  chains.extend(
268  IMP.atom.Chain(c)
269  for c in IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE))
270  if new_hiers:
271  self._assigned_chain_ids = []
272  chain_ids = [c.get_id() for c in chains]
273  if len(set(chain_ids)) < len(chain_ids):
274  print("Duplicate chain IDs detected - reassigning "
275  "alphabetically")
276  for chain, cid in zip(chains, _ChainIDs()):
277  self._assigned_chain_ids.append(cid)
278  chain.set_id(cid)
279  else:
280  for chain, cid in zip(chains, self._assigned_chain_ids):
281  chain.set_id(cid)
282 
283  def _add_hierarchy(self, h):
284  self.system._add_hierarchy(h, self)
285 
286  def _add_restraints(self, rs, model):
287  mapper = IMP.mmcif.restraint._RestraintMapper(self.system)
288  for r in rs:
289  self._handle_restraint(mapper, r, model)
290 
291  def _handle_restraint(self, mapper, r, model):
292  rw = mapper.handle(r, model, self.modeled_assembly)
293  if rw:
294  self._wrapped_restraints.append(rw)
295  self.system.system.restraints.append(rw)
296  else:
297  try:
298  rs = IMP.RestraintSet.get_from(r)
299  except ValueError:
300  rs = None
301  if rs:
302  for child in rs.restraints:
303  self._handle_restraint(mapper, child, model)
304 
305  def _update_restraints(self, model):
306  for rw in self._wrapped_restraints:
307  rw.add_model_fit(model)
308 
309 
310 class Ensemble(ihm.model.Ensemble):
311  """Represent a set of similar models in a state."""
312  def __init__(self, state, name):
313  self.state = weakref.proxy(state)
314  state.system._add_ensemble(self)
315  self._frames = []
316  mg = ihm.model.ModelGroup(name=name)
317  state.append(mg)
318  super(Ensemble, self).__init__(model_group=mg, num_models=0, name=name)
319 
320  def add_frame(self, frame):
321  """Add a frame from a custom source"""
322  self._frames.append(frame)
323  self.num_models += 1
324  model = IMP.mmcif.data._Model(frame, self.state)
325  self.model_group.append(model)
326  self.state._add_frame(frame, model)
327 
328  def add_rmf(self, fname, name, frame=0):
329  """Add a frame from an RMF file"""
330  self.add_frame(RMFFrame(fname, frame, name))
331 
332  def add_model(self, hiers, restraints, name):
333  """Add hierarchies and restraints from an IMP.Model"""
334  self.add_frame(_ModelFrame(hiers, restraints, name))
Represent a set of similar models in a state.
Definition: mmcif/util.py:310
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:158
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
Represent a single IMP state.
Definition: mmcif/util.py:221
def add_rmf
Add a frame from an RMF file.
Definition: mmcif/util.py:328
void link_restraints(RMF::FileConstHandle fh, const Restraints &hs)
Classes to represent data structures used in mmCIF.
Definition: data.py:1
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
An individual state conformation read from a PDB file.
Definition: mmcif/util.py:35
def add_frame
Add a frame from a custom source.
Definition: mmcif/util.py:320
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
Map IMP restraints to mmCIF categories.
A decorator for a residue.
Definition: Residue.h:137
Restraints create_restraints(RMF::FileConstHandle fh, Model *m)
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
def add_model
Add hierarchies and restraints from an IMP.Model.
Definition: mmcif/util.py:332
Store info for a chain of a protein.
Definition: Chain.h:61
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Support for the RMF file format for storing hierarchical molecular data and markup.