IMP logo
IMP Reference Guide  develop.63b38c487d,2024/12/22
The Integrative Modeling Platform
xlink.py
1 import typing
2 import json
3 import pathlib
4 
5 import IMP
6 import IMP.core
7 import IMP.atom
9 import IMP.rmf
10 import IMP.bff
11 
12 import numpy as np
13 
14 
15 class XLinkScore(typing.TypedDict):
16  """
17  Attributes
18  ----------
19 
20  individual: list
21  The scores to all potential cross linking partners. The score is the
22  fraction of distances of all path that are shorter or equal to the
23  cross-linker length
24 
25  total: np.ndarray
26  The sum of scores to all potential cross linking partners. The score is the
27  fraction of distances of all path that are shorter or equal to the
28  cross-linker length
29 
30  """
31  individual: list
32  total: np.ndarray
33 
34 
35 class ScoreXlinkSurfaceDistance(IMP.pmi.restraints.RestraintBase):
36  """
37  :param linker_length:
38  :param linker_width:
39  :param radius:
40  :param simulation_grid_spacing:
41  :param min_points: the minimum number of points the the computed volume. If
42  there are less points the site is not accessible and the score will be
43  zero
44  :param verbose:
45  :return:
46  """
47 
48  model: IMP.Model
49  obstacles: np.ndarray
50  xlinks: typing.Dict[int, typing.Dict]
51  linker_length: float = 20.0
52  linker_width: float = 2.0
53  radius: float = 1.0
54  simulation_grid_spacing: float = 3.5
55  min_points: int = 100
56  verbose: bool = False
57  name_map: dict = None
58 
59  def compute_scores(self, **kwargs) -> XLinkScore:
60  linker_length = kwargs.get('linker_length', self.linker_length)
61  linker_width = kwargs.get('linker_width', self.linker_width)
62  radius = kwargs.get('radius', self.radius)
63  simulation_grid_spacing = kwargs.get('simulation_grid_spacing', self.simulation_grid_spacing)
64  verbose = kwargs.get('verbose', self.verbose)
65  min_points = kwargs.get('min_points', self.min_points)
66  obstacles = self.obstacles
67  scores = list()
68  total_scores = list()
69  for xlink_key in self.xlinks:
70  xlink = self.xlinks[xlink_key]
71  protein_1 = xlink['protein_1']
72  protein_2 = xlink['protein_2']
73  residue_1 = xlink['residue_1']
74  residue_2 = xlink['residue_2']
75  attachment_idx_1 = np.where((obstacles['res_id'] == residue_1) & (obstacles['protein_name'] == protein_1))[0]
76  attachment_idx_2 = np.where((obstacles['res_id'] == residue_2) & (obstacles['protein_name'] == protein_2))[0]
77  x_links_scores = []
78  for idx_1 in attachment_idx_1:
79  origin = obstacles['xyz'][idx_1]
80  for idx_2 in attachment_idx_2:
81  target = obstacles['xyz'][idx_2]
82  if np.linalg.norm(origin - target) > linker_length:
83  if verbose:
84  print("Eucledian distance > linker length")
85  x_links_scores.append(0.0)
86  else:
87  av = get_path_length(
88  obstacles=self.obstacles,
89  idx=idx_1,
90  linker_length=linker_length,
91  linker_width=linker_width,
92  radius=radius,
93  simulation_grid_spacing=simulation_grid_spacing
94  )
95  if len(np.array(av.points()).T) < min_points:
96  if verbose:
97  print(protein_1, residue_1, "is not accessible")
98  x_links_scores.append(0.0)
99  else:
100  if verbose:
101  print("Eucledian distance < linker length.. Testing surface distance")
102  points = av.points()
103  xyz = points[0:3]
104  dist = points[3]
105  dist_eq = np.linalg.norm(xyz.T - target, axis=1) + dist
106  shorter = np.sum(dist_eq < (linker_length + radius))
107  score = shorter / len(dist_eq)
108  x_links_scores.append(score)
109  indiviudal_scores = np.array(x_links_scores)
110  scores.append(indiviudal_scores)
111  total_score = indiviudal_scores.sum()
112  total_scores.append(total_score)
113  if verbose:
114  print(xlink_key, ":", xlink, "score:", total_score)
115  re: XLinkScore = {
116  'total': np.array(total_scores, dtype=np.float),
117  'individual': scores
118  }
119  return re
120 
121  def __init__(
122  self,
123  root_hier: IMP.atom.Hierarchy = None,
124  verbose: bool = False,
125  xlink_settings_file: str = '',
126  rmf_file: str = '',
127  weight: float = 1.0,
128  label: str = 'XLinkSurfaceDistance'
129  ):
130  # create a new ScoreXlinkRMF object with predefined settings
131  base_dir = pathlib.Path(xlink_settings_file).parent
132  with open(xlink_settings_file, 'r') as fp:
133  xlink_settings = json.load(fp)
134  if root_hier is None:
135  model = IMP.Model()
136  else:
137  model = root_hier.get_model()
138  super().__init__(
139  model,
140  weight=weight,
141  label=label
142  )
143  self.model = model
144  keys, formats = list(zip(*OBSTACLES_KEYS_FORMATS))
145  obstacles = np.zeros(0, dtype={
146  'names': keys,
147  'formats': formats
148  }
149  )
150  self.obstacles = obstacles
151  self.xlinks = dict()
152  self.linker_length = xlink_settings['linker_length']
153  self.linker_width = xlink_settings['linker_width']
154  self.radius = xlink_settings['radius']
155  self.simulation_grid_spacing = xlink_settings['simulation_grid_spacing']
156  self.min_points = xlink_settings['min_points']
157  self.verbose = verbose
158  xlink_file = xlink_settings['xlink_file']
159  if pathlib.Path(xlink_file).is_file():
160  self.xlinks = IMP.bff.tools.read_xlink_table(
161  fn=xlink_file
162  )
163  elif (base_dir / xlink_file).is_file():
164  self.xlinks = IMP.bff.tools.read_xlink_table(
165  fn=str(base_dir / xlink_file)
166  )
167  else:
168  raise FileNotFoundError("Could not find XL file %s" % xlink_file)
169  if pathlib.Path(rmf_file).is_file():
170  self.obstacles = get_obstacles(
171  rmf_file=rmf_file,
172  model=self.model,
173  name_map=self.name_map
174  )
175  # Add custom metadata (will be saved in RMF output)
176  self.rs.filename = xlink_settings_file
177 
178  def __call__(
179  self,
180  rmf_file: str = '',
181  xlink_file: str = '',
182  frame_index: int = 0,
183  hier: IMP.atom.Hierarchy = None,
184  *args, **kwargs
185  ):
186  if pathlib.Path(xlink_file).is_file():
187  self.xlinks = IMP.bff.tools.read_xlink_table(
188  fn=xlink_file
189  )
190  if pathlib.Path(rmf_file).is_file():
191  self.obstacles = get_obstacles(
192  rmf_file=rmf_file,
193  hier=hier,
194  model=self.model,
195  frame_index=frame_index,
196  name_map=self.name_map
197  )
198  return self.compute_scores(**kwargs)
199 
200  def get_score(self) -> float:
201  total_score = float(np.sum(self()['total']))
202  return total_score
203 
204 
def read_xlink_table
Read a xlink table.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
Classes to handle different kinds of restraints.
The standard decorator for manipulating molecular structures.
Basic functionality that is expected to be used by a wide variety of IMP users.
Functionality for loading, creating, manipulating and scoring atomic structures.
Support for the RMF file format for storing hierarchical molecular data and markup.
Bayesian Fluorescence Framework.