IMP logo
IMP Reference Guide  2.14.0
The Integrative Modeling Platform
mmcif/util.py
1 """@namespace IMP.mmcif.util
2  @brief Utility functions for IMP.mmcif.
3 """
4 
5 import ihm.location
6 import ihm.dumper
7 import IMP.mmcif.data
9 import IMP.rmf
10 import IMP.atom
11 import RMF
12 import ihm.format
13 import ihm.representation
14 import string
15 import weakref
16 import operator
17 
18 class _ChainIDs(object):
19  """Map indices to multi-character chain IDs.
20  We label the first 26 chains A-Z, then we move to two-letter
21  chain IDs: AA through AZ, then BA through BZ, through to ZZ.
22  This continues with longer chain IDs."""
23  def __getitem__(self, ind):
24  chars = string.ascii_uppercase
25  lc = len(chars)
26  ids = []
27  while ind >= lc:
28  ids.append(chars[ind % lc])
29  ind = ind // lc - 1
30  ids.append(chars[ind])
31  return "".join(reversed(ids))
32 
33 
34 class RMFFrame(object):
35  """An individual state conformation read from a PDB file"""
36  def __init__(self, filename, frame, name):
37  self.filename, self.frame = filename, frame
38  self.name = name
39 
40  def create(self, model):
41  rmf = RMF.open_rmf_file_read_only(self.filename)
42  # todo: support frame!=0
43  hiers = IMP.rmf.create_hierarchies(rmf, model)
44  restraints = IMP.rmf.create_restraints(rmf, model)
45  return hiers, restraints
46 
47  def link(self, hiers, restraints):
48  rmf = RMF.open_rmf_file_read_only(self.filename)
49  IMP.rmf.link_hierarchies(rmf, hiers)
50  IMP.rmf.link_restraints(rmf, restraints)
51  IMP.rmf.load_frame(rmf, RMF.FrameID(self.frame))
52 
53 
54 class _ModelFrame(object):
55  """An individual state conformation read from an IMP.Model"""
56  def __init__(self, hiers, restraints, name):
57  self.hiers, self.restraints = hiers, restraints
58  self.name = name
59 
60  def create(self, model):
61  return self.hiers, self.restraints
62 
63  def link(self, hiers, restraints):
64  if len(hiers) != len(self.hiers) \
65  or len(restraints) != len(self.restraints):
66  raise ValueError("Frames do not match")
67  hiers[:] = self.hiers
68  # todo: this won't work currently because the Restraint objects
69  # will change
70  restraints[:] = self.restraints
71 
72 
73 class _NonModeledChain(object):
74  """Represent a chain that was experimentally characterized but not modeled.
75  Such a chain resembles an IMP.atom.Chain, but has no associated
76  structure, and belongs to no state."""
77  def __init__(self, name, sequence, chain_type):
78  self.name = name
79  self.sequence = sequence
80  self.chain_type = chain_type
81 
82  get_sequence = lambda self: self.sequence
83 
84 
85 class System(object):
86  def __init__(self):
87  self.system = ihm.System()
88  self._states = []
89  self._ensembles = []
90  self._frames = []
91 
92  self.entities = IMP.mmcif.data._EntityMapper(self.system)
93  self.components = IMP.mmcif.data._ComponentMapper(self.system)
94  self._software = IMP.mmcif.data._AllSoftware(self.system)
95  self._external_files = IMP.mmcif.data._ExternalFiles(self.system)
96  self.datasets = IMP.mmcif.data._Datasets(self.system)
97  # All modeling protocols
98  self.protocols = IMP.mmcif.data._Protocols(self.system)
99  self.representation = ihm.representation.Representation()
100  self.system.orphan_representations.append(self.representation)
101 
102  def _update_location(self, fileloc):
103  """Update FileLocation to point to a parent repository, if any"""
104  ihm.location.Repository._update_in_repos(fileloc,
105  self._external_files._repos)
106 
107  def add_repository(self, doi, root=None, url=None, top_directory=None):
108  """Add a repository containing one or more modeling files."""
109  self._external_files.add_repo(ihm.location.Repository(
110  doi, root, url, top_directory))
111 
112  def _add_state(self, state):
113  if not self.system.state_groups:
114  self.system.state_groups.append(ihm.model.StateGroup())
115  self.system.state_groups[-1].append(state)
116  self._states.append(state)
117 
118  def _add_ensemble(self, ensemble):
119  self._ensembles.append(ensemble)
120  self.system.ensembles.append(ensemble)
121 
122  def _add_frame(self, frame):
123  self._frames.append(frame)
124  frame.id = len(self._frames)
125 
126  def _add_hierarchy(self, h, state):
127  chains = [IMP.atom.Chain(c)
128  for c in IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE)]
129  if len(chains) == 0:
130  raise ValueError("No chains found in %s" % h)
131  # todo: handle same chain in multiple states
132  for c in chains:
133  component = self._add_chain(c)
134  state._all_modeled_components.append(component)
135  if hasattr(component, 'asym_unit'):
136  state.modeled_assembly.append(component.asym_unit)
137  else:
138  state.modeled_assembly.append(component.entity)
139  state.repsegments[component] \
140  = list(self._get_repsegments(c, component,
141  self._get_all_starting_models(component)))
142  # Number of states that have representation for this component
143  num_state_reps = len([s for s in self._states
144  if component in s.repsegments])
145  # Assume representation for a given component is the same in all
146  # states, so we only need one copy of it in the mmCIF file
147  if num_state_reps == 1:
148  self.representation.extend(state.repsegments[component])
149  self.protocols._add_hierarchy(h, state.modeled_assembly)
150  self._external_files.add_hierarchy(h)
151  self._software.add_hierarchy(h)
152 
153  def _get_all_starting_models(self, comp):
154  """Get all starting models (in all states) for the given component"""
155  for state in self._states:
156  for seg in state.repsegments.get(comp, []):
157  if seg.starting_model:
158  yield seg.starting_model
159 
160  def _get_repsegments(self, chain, component, existing_starting_models):
161  """Yield groups of particles under chain with same representation"""
162  smf = IMP.mmcif.data._StartingModelFinder(component,
163  existing_starting_models)
164  segfactory = IMP.mmcif.data._RepSegmentFactory(component)
165 
166  for sp in self._get_structure_particles(chain):
167  starting_model = smf.find(sp, self)
168  seg = segfactory.add(sp, starting_model)
169  if seg:
170  yield seg
171  last = segfactory.get_last()
172  if last:
173  yield last
174 
175  def _get_structure_particles(self, chain):
176  """Yield all particles under chain with coordinates.
177  They are sorted by residue index."""
178  # todo: handle Representation decorators for non-PMI-1 models
179  ps = IMP.atom.get_leaves(chain)
180  resind_dict = {}
181  for p in ps:
183  residue = IMP.atom.Residue(p)
184  resind = residue.get_index()
185  if resind in resind_dict:
186  continue
187  resind_dict[resind] = residue
189  fragment = IMP.atom.Fragment(p)
190  # todo: handle non-contiguous fragments
191  resinds = fragment.get_residue_indexes()
192  resind = resinds[len(resinds) // 2]
193  if resind in resind_dict:
194  continue
195  resind_dict[resind] = fragment
196  # Return values sorted by key (residue index)
197  for item in sorted(resind_dict.items(), key=operator.itemgetter(0)):
198  yield item[1]
199 
200  def add_non_modeled_chain(self, name, sequence,
201  chain_type=IMP.atom.UnknownChainType):
202  """Add a chain that wasn't modeled by IMP."""
203  c = _NonModeledChain(name, sequence, chain_type)
204  self._add_chain(c)
205 
206  def _add_chain(self, c):
207  entity = self.entities.add(c)
208  component = self.components.add(c, entity)
209  return component
210 
211  def write(self, fname):
212  with open(fname, 'w') as fh:
213  ihm.dumper.write(fh, [self.system])
214 
215 
216 class State(ihm.model.State):
217  """Represent a single IMP state."""
218  def __init__(self, system):
219  super(State, self).__init__()
220  self.system = weakref.proxy(system)
221  system._add_state(self)
222  self.model = IMP.Model()
223  self.hiers = None
224  self._wrapped_restraints = []
225  # The assembly of all components modeled by IMP in this state.
226  # This may be smaller than the complete assembly.
227  self.modeled_assembly = ihm.Assembly(name="Modeled assembly",
228  description="All components modeled by IMP")
229  system.system.orphan_assemblies.append(self.modeled_assembly)
230  # A list of ihm.representation.Segment objects for each Component
231  self.repsegments = {}
232  self._frames = []
233 
234  self._all_modeled_components = []
235 
236  def _add_frame(self, f, model):
237  self._frames.append(f)
238  self.system._add_frame(f)
239  if self._load_frame(f):
240  for h in self.hiers:
241  self._add_hierarchy(h)
242  self._add_restraints(self.restraints, model)
243  else:
244  self._update_restraints(model)
245 
246  def _load_frame(self, f):
247  """Load hierarchies and restraints from a frame.
248  Return True if this results in making new hierarchies."""
249  if self.hiers is None:
250  self.hiers, self.restraints = f.create(self.model)
251  self._remove_duplicate_chain_ids(True)
252  return True
253  else:
254  f.link(self.hiers, self.restraints)
255  self._remove_duplicate_chain_ids(False)
256  return False
257 
258  def _remove_duplicate_chain_ids(self, new_hiers):
259  chains = []
260  for h in self.hiers:
261  chains.extend(IMP.atom.Chain(c)
262  for c in IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE))
263  if new_hiers:
264  self._assigned_chain_ids = []
265  chain_ids = [c.get_id() for c in chains]
266  if len(set(chain_ids)) < len(chain_ids):
267  print("Duplicate chain IDs detected - reassigning "
268  "alphabetically")
269  for chain, cid in zip(chains, _ChainIDs()):
270  self._assigned_chain_ids.append(cid)
271  chain.set_id(cid)
272  else:
273  for chain, cid in zip(chains, self._assigned_chain_ids):
274  chain.set_id(cid)
275 
276  def _add_hierarchy(self, h):
277  self.system._add_hierarchy(h, self)
278 
279  def _add_restraints(self, rs, model):
280  m = IMP.mmcif.restraint._RestraintMapper(self.system)
281  for r in rs:
282  rw = m.handle(r, model, self.modeled_assembly)
283  if rw:
284  self._wrapped_restraints.append(rw)
285  self.system.system.restraints.append(rw)
286 
287  def _update_restraints(self, model):
288  for rw in self._wrapped_restraints:
289  rw.add_model_fit(model)
290 
291 
292 class Ensemble(ihm.model.Ensemble):
293  """Represent a set of similar models in a state."""
294  def __init__(self, state, name):
295  self.state = weakref.proxy(state)
296  state.system._add_ensemble(self)
297  self._frames = []
298  mg = ihm.model.ModelGroup(name=name)
299  state.append(mg)
300  super(Ensemble, self).__init__(model_group=mg, num_models=0, name=name)
301 
302  def add_frame(self, frame):
303  """Add a frame from a custom source"""
304  self._frames.append(frame)
305  self.num_models += 1
306  model = IMP.mmcif.data._Model(frame, self.state)
307  self.model_group.append(model)
308  self.state._add_frame(frame, model)
309 
310  def add_rmf(self, fname, name, frame=0):
311  """Add a frame from an RMF file"""
312  self.add_frame(RMFFrame(fname, frame, name))
313 
314  def add_model(self, hiers, restraints, name):
315  """Add hierarchies and restraints from an IMP.Model"""
316  self.add_frame(_ModelFrame(hiers, restraints, name))
Represent a set of similar models in a state.
Definition: mmcif/util.py:292
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:156
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
Represent a single IMP state.
Definition: mmcif/util.py:216
def add_rmf
Add a frame from an RMF file.
Definition: mmcif/util.py:310
void link_restraints(RMF::FileConstHandle fh, const Restraints &hs)
Classes to represent data structures used in mmCIF.
Definition: data.py:1
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
An individual state conformation read from a PDB file.
Definition: mmcif/util.py:34
def add_frame
Add a frame from a custom source.
Definition: mmcif/util.py:302
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
Map IMP restraints to mmCIF categories.
A decorator for a residue.
Definition: Residue.h:135
Restraints create_restraints(RMF::FileConstHandle fh, Model *m)
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
def add_model
Add hierarchies and restraints from an IMP.Model.
Definition: mmcif/util.py:314
Store info for a chain of a protein.
Definition: Chain.h:61
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Support for the RMF file format for storing hierarchical molecular data and markup.