IMP logo
IMP Reference Guide  2.16.0
The Integrative Modeling Platform
mmcif/util.py
1 """@namespace IMP.mmcif.util
2  @brief Utility functions for IMP.mmcif.
3 """
4 
5 import ihm.location
6 import ihm.dumper
7 import IMP.mmcif.data
9 import IMP.rmf
10 import IMP.atom
11 import RMF
12 import ihm.format
13 import ihm.representation
14 import string
15 import weakref
16 import operator
17 
18 
19 class _ChainIDs(object):
20  """Map indices to multi-character chain IDs.
21  We label the first 26 chains A-Z, then we move to two-letter
22  chain IDs: AA through AZ, then BA through BZ, through to ZZ.
23  This continues with longer chain IDs."""
24  def __getitem__(self, ind):
25  chars = string.ascii_uppercase
26  lc = len(chars)
27  ids = []
28  while ind >= lc:
29  ids.append(chars[ind % lc])
30  ind = ind // lc - 1
31  ids.append(chars[ind])
32  return "".join(reversed(ids))
33 
34 
35 class RMFFrame(object):
36  """An individual state conformation read from a PDB file"""
37  def __init__(self, filename, frame, name):
38  self.filename, self.frame = filename, frame
39  self.name = name
40 
41  def create(self, model):
42  rmf = RMF.open_rmf_file_read_only(self.filename)
43  # todo: support frame!=0
44  hiers = IMP.rmf.create_hierarchies(rmf, model)
45  restraints = IMP.rmf.create_restraints(rmf, model)
46  return hiers, restraints
47 
48  def link(self, hiers, restraints):
49  rmf = RMF.open_rmf_file_read_only(self.filename)
50  IMP.rmf.link_hierarchies(rmf, hiers)
51  IMP.rmf.link_restraints(rmf, restraints)
52  IMP.rmf.load_frame(rmf, RMF.FrameID(self.frame))
53 
54 
55 class _ModelFrame(object):
56  """An individual state conformation read from an IMP.Model"""
57  def __init__(self, hiers, restraints, name):
58  self.hiers, self.restraints = hiers, restraints
59  self.name = name
60 
61  def create(self, model):
62  return self.hiers, self.restraints
63 
64  def link(self, hiers, restraints):
65  if len(hiers) != len(self.hiers) \
66  or len(restraints) != len(self.restraints):
67  raise ValueError("Frames do not match")
68  hiers[:] = self.hiers
69  # todo: this won't work currently because the Restraint objects
70  # will change
71  restraints[:] = self.restraints
72 
73 
74 class _NonModeledChain(object):
75  """Represent a chain that was experimentally characterized but not modeled.
76  Such a chain resembles an IMP.atom.Chain, but has no associated
77  structure, and belongs to no state."""
78  def __init__(self, name, sequence, chain_type):
79  self.name = name
80  self.sequence = sequence
81  self.chain_type = chain_type
82 
83  def get_sequence(self):
84  return self.sequence
85 
86 
87 class System(object):
88  def __init__(self):
89  self.system = ihm.System()
90  self._states = []
91  self._ensembles = []
92  self._frames = []
93 
94  self.entities = IMP.mmcif.data._EntityMapper(self.system)
95  self.components = IMP.mmcif.data._ComponentMapper(self.system)
96  self._software = IMP.mmcif.data._AllSoftware(self.system)
97  self._external_files = IMP.mmcif.data._ExternalFiles(self.system)
98  self.datasets = IMP.mmcif.data._Datasets(self.system)
99  # All modeling protocols
100  self.protocols = IMP.mmcif.data._Protocols(self.system)
101  self.representation = ihm.representation.Representation()
102  self.system.orphan_representations.append(self.representation)
103 
104  def _update_location(self, fileloc):
105  """Update FileLocation to point to a parent repository, if any"""
106  ihm.location.Repository._update_in_repos(fileloc,
107  self._external_files._repos)
108 
109  def add_repository(self, doi, root=None, url=None, top_directory=None):
110  """Add a repository containing one or more modeling files."""
111  self._external_files.add_repo(ihm.location.Repository(
112  doi, root, url, top_directory))
113 
114  def _add_state(self, state):
115  if not self.system.state_groups:
116  self.system.state_groups.append(ihm.model.StateGroup())
117  self.system.state_groups[-1].append(state)
118  self._states.append(state)
119 
120  def _add_ensemble(self, ensemble):
121  self._ensembles.append(ensemble)
122  self.system.ensembles.append(ensemble)
123 
124  def _add_frame(self, frame):
125  self._frames.append(frame)
126  frame.id = len(self._frames)
127 
128  def _add_hierarchy(self, h, state):
129  chains = [IMP.atom.Chain(c)
130  for c in IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE)]
131  if len(chains) == 0:
132  raise ValueError("No chains found in %s" % h)
133  # todo: handle same chain in multiple states
134  for c in chains:
135  component = self._add_chain(c)
136  state._all_modeled_components.append(component)
137  if hasattr(component, 'asym_unit'):
138  state.modeled_assembly.append(component.asym_unit)
139  else:
140  state.modeled_assembly.append(component.entity)
141  state.repsegments[component] = \
142  list(self._get_repsegments(
143  c, component, self._get_all_starting_models(component)))
144  # Number of states that have representation for this component
145  num_state_reps = len([s for s in self._states
146  if component in s.repsegments])
147  # Assume representation for a given component is the same in all
148  # states, so we only need one copy of it in the mmCIF file
149  if num_state_reps == 1:
150  self.representation.extend(state.repsegments[component])
151  self.protocols._add_hierarchy(h, state.modeled_assembly)
152  self._external_files.add_hierarchy(h)
153  self._software.add_hierarchy(h)
154 
155  def _get_all_starting_models(self, comp):
156  """Get all starting models (in all states) for the given component"""
157  for state in self._states:
158  for seg in state.repsegments.get(comp, []):
159  if seg.starting_model:
160  yield seg.starting_model
161 
162  def _get_repsegments(self, chain, component, existing_starting_models):
163  """Yield groups of particles under chain with same representation"""
164  smf = IMP.mmcif.data._StartingModelFinder(component,
165  existing_starting_models)
166  segfactory = IMP.mmcif.data._RepSegmentFactory(component)
167 
168  for sp in self._get_structure_particles(chain):
169  starting_model = smf.find(sp, self)
170  seg = segfactory.add(sp, starting_model)
171  if seg:
172  yield seg
173  last = segfactory.get_last()
174  if last:
175  yield last
176 
177  def _get_structure_particles(self, chain):
178  """Yield all particles under chain with coordinates.
179  They are sorted by residue index."""
180  # todo: handle Representation decorators for non-PMI-1 models
181  ps = IMP.atom.get_leaves(chain)
182  resind_dict = {}
183  for p in ps:
185  residue = IMP.atom.Residue(p)
186  resind = residue.get_index()
187  if resind in resind_dict:
188  continue
189  resind_dict[resind] = residue
191  fragment = IMP.atom.Fragment(p)
192  # todo: handle non-contiguous fragments
193  resinds = fragment.get_residue_indexes()
194  resind = resinds[len(resinds) // 2]
195  if resind in resind_dict:
196  continue
197  resind_dict[resind] = fragment
198  # Return values sorted by key (residue index)
199  for item in sorted(resind_dict.items(), key=operator.itemgetter(0)):
200  yield item[1]
201 
202  def add_non_modeled_chain(self, name, sequence,
203  chain_type=IMP.atom.UnknownChainType):
204  """Add a chain that wasn't modeled by IMP."""
205  c = _NonModeledChain(name, sequence, chain_type)
206  self._add_chain(c)
207 
208  def _add_chain(self, c):
209  entity = self.entities.add(c)
210  component = self.components.add(c, entity)
211  return component
212 
213  def write(self, fname):
214  with open(fname, 'w') as fh:
215  ihm.dumper.write(fh, [self.system])
216 
217 
218 class State(ihm.model.State):
219  """Represent a single IMP state."""
220  def __init__(self, system):
221  super(State, self).__init__()
222  self.system = weakref.proxy(system)
223  system._add_state(self)
224  self.model = IMP.Model()
225  self.hiers = None
226  self._wrapped_restraints = []
227  # The assembly of all components modeled by IMP in this state.
228  # This may be smaller than the complete assembly.
229  self.modeled_assembly = ihm.Assembly(
230  name="Modeled assembly",
231  description="All components modeled by IMP")
232  system.system.orphan_assemblies.append(self.modeled_assembly)
233  # A list of ihm.representation.Segment objects for each Component
234  self.repsegments = {}
235  self._frames = []
236 
237  self._all_modeled_components = []
238 
239  def _add_frame(self, f, model):
240  self._frames.append(f)
241  self.system._add_frame(f)
242  if self._load_frame(f):
243  for h in self.hiers:
244  self._add_hierarchy(h)
245  self._add_restraints(self.restraints, model)
246  else:
247  self._update_restraints(model)
248 
249  def _load_frame(self, f):
250  """Load hierarchies and restraints from a frame.
251  Return True if this results in making new hierarchies."""
252  if self.hiers is None:
253  self.hiers, self.restraints = f.create(self.model)
254  self._remove_duplicate_chain_ids(True)
255  return True
256  else:
257  f.link(self.hiers, self.restraints)
258  self._remove_duplicate_chain_ids(False)
259  return False
260 
261  def _remove_duplicate_chain_ids(self, new_hiers):
262  chains = []
263  for h in self.hiers:
264  chains.extend(
265  IMP.atom.Chain(c)
266  for c in IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE))
267  if new_hiers:
268  self._assigned_chain_ids = []
269  chain_ids = [c.get_id() for c in chains]
270  if len(set(chain_ids)) < len(chain_ids):
271  print("Duplicate chain IDs detected - reassigning "
272  "alphabetically")
273  for chain, cid in zip(chains, _ChainIDs()):
274  self._assigned_chain_ids.append(cid)
275  chain.set_id(cid)
276  else:
277  for chain, cid in zip(chains, self._assigned_chain_ids):
278  chain.set_id(cid)
279 
280  def _add_hierarchy(self, h):
281  self.system._add_hierarchy(h, self)
282 
283  def _add_restraints(self, rs, model):
284  m = IMP.mmcif.restraint._RestraintMapper(self.system)
285  for r in rs:
286  rw = m.handle(r, model, self.modeled_assembly)
287  if rw:
288  self._wrapped_restraints.append(rw)
289  self.system.system.restraints.append(rw)
290 
291  def _update_restraints(self, model):
292  for rw in self._wrapped_restraints:
293  rw.add_model_fit(model)
294 
295 
296 class Ensemble(ihm.model.Ensemble):
297  """Represent a set of similar models in a state."""
298  def __init__(self, state, name):
299  self.state = weakref.proxy(state)
300  state.system._add_ensemble(self)
301  self._frames = []
302  mg = ihm.model.ModelGroup(name=name)
303  state.append(mg)
304  super(Ensemble, self).__init__(model_group=mg, num_models=0, name=name)
305 
306  def add_frame(self, frame):
307  """Add a frame from a custom source"""
308  self._frames.append(frame)
309  self.num_models += 1
310  model = IMP.mmcif.data._Model(frame, self.state)
311  self.model_group.append(model)
312  self.state._add_frame(frame, model)
313 
314  def add_rmf(self, fname, name, frame=0):
315  """Add a frame from an RMF file"""
316  self.add_frame(RMFFrame(fname, frame, name))
317 
318  def add_model(self, hiers, restraints, name):
319  """Add hierarchies and restraints from an IMP.Model"""
320  self.add_frame(_ModelFrame(hiers, restraints, name))
Represent a set of similar models in a state.
Definition: mmcif/util.py:296
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:158
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
Represent a single IMP state.
Definition: mmcif/util.py:218
def add_rmf
Add a frame from an RMF file.
Definition: mmcif/util.py:314
void link_restraints(RMF::FileConstHandle fh, const Restraints &hs)
Classes to represent data structures used in mmCIF.
Definition: data.py:1
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:73
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
An individual state conformation read from a PDB file.
Definition: mmcif/util.py:35
def add_frame
Add a frame from a custom source.
Definition: mmcif/util.py:306
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
Map IMP restraints to mmCIF categories.
A decorator for a residue.
Definition: Residue.h:137
Restraints create_restraints(RMF::FileConstHandle fh, Model *m)
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
def add_model
Add hierarchies and restraints from an IMP.Model.
Definition: mmcif/util.py:318
Store info for a chain of a protein.
Definition: Chain.h:61
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Support for the RMF file format for storing hierarchical molecular data and markup.