IMP logo
IMP Reference Guide  2.8.0
The Integrative Modeling Platform
topology.py
1 from __future__ import print_function
2 import IMP
3 import RMF
4 import IMP.atom
5 import IMP.rmf
6 import IMP.pmi
7 import IMP.pmi.analysis
8 import IMP.pmi.output
9 from collections import defaultdict
10 import numpy as np
11 
12 class TopologyPlot(object):
13  """A class to read RMF files and make a network contact map"""
14  def __init__(self,model,selections,cutoff,frequency_cutoff,
15  colors=None,fixed=None,pos=None,proteomic_edges=None,quantitative_proteomic_data=None):
16  """Set up a new graphXL object
17  @param model The IMP model
18  @param selection_dict A dictionary containing component names.
19  Keys are labels
20  values are either moleculename or start,stop,moleculename
21  @param cutoff The distance cutoff
22  @param frequency_cutoff The frequency cutoff
23  @param colors A dictionary of colors (HEX code,values) for subunits (keywords)
24  @param fixed A list of subunits that are kept fixed
25  @param pos A dictionary with positions (tuple, values) of subunits (keywords)
26  @param proteomic_edges A list edges to represent proteomic data
27  @param quantitative_proteomic_data A dictionary of edges to represent
28  quantitative proteomic data such as PE Scores, or genetic interactions
29  """
30  import itertools\
31 
32  self.mdl = model
33  self.selections = selections
34  self.contact_counts={}
35  self.edges=defaultdict(int)
36  for (name1,name2) in itertools.combinations(self.selections.keys(),2):
37  self.edges[tuple(sorted((name1,name2)))]=0
38  self.cutoff = cutoff
39  self.frequency_cutoff=frequency_cutoff
40  self.gcpf = IMP.core.GridClosePairsFinder()
41  self.gcpf.set_distance(self.cutoff)
42  self.names = list(self.selections.keys())
43  self.colors=colors
44  self.fixed=fixed
45  self.pos=pos
46  self.proteomic_edges=proteomic_edges
47  self.quantitative_proteomic_data=quantitative_proteomic_data
48  self.num_rmf=0
49 
50  def add_rmf(self,rmf_fn,nframe):
51  """Add selections from an RMF file"""
52  print('reading from RMF file',rmf_fn)
53  rh = RMF.open_rmf_file_read_only(rmf_fn)
54  prots = IMP.rmf.create_hierarchies(rh, self.mdl)
55  hier = prots[0]
56  IMP.rmf.load_frame(rh, RMF.FrameID(0))
57  ps_per_component=defaultdict(list)
58  if self.num_rmf==0:
59  self.size_per_component=defaultdict(int)
60  self.mdl.update()
61 
62  #gathers particles for all components
64  all_particles_by_resolution = []
65  for name in part_dict:
66  all_particles_by_resolution += part_dict[name]
67 
68  for component_name in self.selections:
69  for seg in self.selections[component_name]:
70  if type(seg) == str:
71  s = IMP.atom.Selection(hier,molecule=seg)
72  elif type(seg) == tuple:
73  s = IMP.atom.Selection(hier,molecule=seg[2],
74  residue_indexes=range(seg[0], seg[1] + 1))
75  else:
76  raise Exception('could not understand selection tuple '+str(seg))
77  parts = list(set(s.get_selected_particles()) & set(all_particles_by_resolution))
78  ps_per_component[component_name] += IMP.get_indexes(parts)
79  if self.num_rmf==0:
80  self.size_per_component[component_name] += sum(len(IMP.pmi.tools.get_residue_indexes(p)) for p in parts)
81 
82  for n1,name1 in enumerate(self.names):
83  for name2 in self.names[n1+1:]:
84  ncontacts = len(self.gcpf.get_close_pairs(self.mdl,
85  ps_per_component[name1],
86  ps_per_component[name2]))
87  if ncontacts>0:
88  self.edges[tuple(sorted((name1,name2)))]+=1.0
89 
90  self.num_rmf+=1
91 
92  def make_plot(self,groups,out_fn,quantitative_proteomic_data=False):
93  '''
94  plot the interaction matrix
95  @param groups is the list of groups of domains, eg,
96  [["protA_1-10","prot1A_11-100"],["protB"]....]
97  it will plot a space between different groups
98  @param out_fn name of the plot file
99  @param quantitative_proteomic_data plot the quantitative proteomic data
100  '''
101  import numpy as np
102  import matplotlib.pyplot as plt
103  from matplotlib import cm
104 
105  ax=plt.gca()
106  ax.set_aspect('equal', 'box')
107  ax.xaxis.set_major_locator(plt.NullLocator())
108  ax.yaxis.set_major_locator(plt.NullLocator())
109 
110  largespace=0.6
111  smallspace=0.5
112  squaredistance=1.0
113  squaresize=0.99
114  domain_xlocations={}
115  domain_ylocations={}
116 
117  xoffset=squaredistance
118  yoffset=squaredistance
119  xlabels=[]
120  ylabels=[]
121  for group in groups:
122  xoffset+=largespace
123  yoffset+=largespace
124  for subgroup in group:
125  xoffset+=smallspace
126  yoffset+=smallspace
127  for domain in subgroup:
128  domain_xlocations[domain]=xoffset
129  domain_ylocations[domain]=yoffset
130  #rect = plt.Rectangle([xoffset- squaresize / 2, yoffset - squaresize / 2], squaresize, squaresize,
131  # facecolor=(1,1,1), edgecolor=(0.1,0.1,0.1))
132 
133  #ax.add_patch(rect)
134  #ax.text(xoffset , yoffset ,domain,horizontalalignment='left',verticalalignment='center',rotation=-45.0)
135  xoffset+=squaredistance
136  yoffset+=squaredistance
137 
138  for edge,count in self.edges.items():
139 
140  if quantitative_proteomic_data:
141  #normalize
142  maxqpd=max(self.quantitative_proteomic_data.values())
143  minqpd=min(self.quantitative_proteomic_data.values())
144  if edge in self.quantitative_proteomic_data:
145  value=self.quantitative_proteomic_data[edge]
146  elif (edge[1],edge[0]) in self.quantitative_proteomic_data:
147  value=self.quantitative_proteomic_data[(edge[1],edge[0])]
148  else:
149  value=0.0
150  print(minqpd,maxqpd)
151  density=(1.0-(value-minqpd)/(maxqpd-minqpd))
152  else:
153  density=(1.0-float(count)/self.num_rmf)
154  color=(density,density,1.0)
155  x=domain_xlocations[edge[0]]
156  y=domain_ylocations[edge[1]]
157  if x>y: xtmp=y; ytmp=x; x=xtmp; y=ytmp
158  rect = plt.Rectangle([x - squaresize / 2, y - squaresize / 2], squaresize, squaresize,
159  facecolor=color, edgecolor='Gray', linewidth=0.1)
160  ax.add_patch(rect)
161  rect = plt.Rectangle([y - squaresize / 2, x - squaresize / 2], squaresize, squaresize,
162  facecolor=color, edgecolor='Gray', linewidth=0.1)
163  ax.add_patch(rect)
164 
165  ax.autoscale_view()
166  plt.savefig(out_fn)
167  plt.show()
168  exit()
169 
170  def make_graph(self,out_fn):
171  edges=[]
172  weights=[]
173  print('num edges',len(self.edges))
174  for edge,count in self.edges.items():
175  # filter if frequency of contacts is greater than frequency_cutoff
176  if float(count)/self.num_rmf>self.frequency_cutoff:
177  print(count,edge)
178  edges.append(edge)
179  weights.append(count)
180  for nw,w in enumerate(weights):
181  weights[nw]=float(weights[nw])/max(weights)
182  IMP.pmi.output.draw_graph(edges,#node_size=1000,
183  node_size=dict(self.size_per_component),
184  node_color=self.colors,
185  fixed=self.fixed,
186  pos=self.pos,
187  edge_thickness=1, #weights,
188  edge_alpha=0.3,
189  edge_color='gray',
190  out_filename=out_fn,
191  validation_edges=self.proteomic_edges)
192 
193 
194 
195 def draw_component_composition(DegreesOfFreedom, max=1000, draw_pdb_names=False,show=True):
196  """A function to plot the representation on the sequence
197  @param DegreesOfFreedom input a IMP.pmi.dof.DegreesOfFreedom instance"""
198  import matplotlib as mpl
199  mpl.use('Agg')
200  from matplotlib import pyplot
201  from operator import itemgetter
202  import IMP.pmi.tools
203 
204  #first build the movers dictionary
205  movers_mols_res=IMP.pmi.tools.OrderedDict()
206  for mv in DegreesOfFreedom.movers_particles_map:
207  hs=DegreesOfFreedom.movers_particles_map[mv]
208  res=[]
209 
210  for h in hs:
212  res=[IMP.atom.Residue(h).get_index()]
214  res=IMP.atom.Fragment(h).get_residue_indexes()
215  if res:
216  is_molecule=False
217  hp=h
218  while not is_molecule:
219  hp=hp.get_parent()
220  is_molecule=IMP.atom.Molecule.get_is_setup(hp)
221  name=IMP.atom.Molecule(hp).get_name()+"."+str(IMP.atom.Copy(hp).get_copy_index())
222  if not mv in movers_mols_res:
223  movers_mols_res[mv]=IMP.pmi.tools.OrderedDict()
224  movers_mols_res[mv][name]=IMP.pmi.tools.Segments(res)
225  else:
226  if not name in movers_mols_res[mv]:
227  movers_mols_res[mv][name]=IMP.pmi.tools.Segments(res)
228  else:
229  movers_mols_res[mv][name].add(res)
230  # then get all movers by type
231  fb_movers=[]
232  rb_movers=[]
233  srb_movers=[]
234  for mv in movers_mols_res:
235  mvtype=None
236  if type(mv) is IMP.core.RigidBodyMover:
237  rb_movers.append(mv)
238  if type(mv) is IMP.core.BallMover:
239  fb_movers.append(mv)
240  if type(mv) is IMP.pmi.TransformMover:
241  srb_movers.append(mv)
242 
243  # now remove residue assigned to BallMovers from RigidBodies
244  for mv in fb_movers:
245  for mol in movers_mols_res[mv]:
246  for i in movers_mols_res[mv][mol].get_flatten():
247  for mv_rb in rb_movers:
248  try:
249  movers_mols_res[mv_rb][mol].remove(i)
250  except:
251  continue
252 
253  elements={}
254  c=IMP.pmi.tools.Colors()
255  colors=c.get_list_distant_colors()
256  colors=["blue", "red", "green", "pink", "cyan", "purple",
257  "magenta", "orange", "grey", "brown", "gold",
258  "khaki", "olive drab", "deep blue sky"]
259  rgb_colors=[(240,163,255),(0,117,220),
260  (153,63,0),(76,0,92),(25,25,25),
261  (0,92,49),(43,206,72),(255,204,153),
262  (128,128,128),(148,255,181),(143,124,0),
263  (157,204,0),(194,0,136),(0,51,128),(255,164,5),
264  (255,168,187),(66,102,0),(255,0,16),(94,241,242),
265  (0,153,143),(224,255,102),(116,10,255),(153,0,0),
266  (255,255,128),(255,255,0),(255,80,5)]
267  colors=[(float(c0)/255,float(c1)/255,float(c2)/255) for (c0,c1,c2) in rgb_colors]
268  mvrb_color={}
269  for mv in movers_mols_res:
270  mvtype=None
271  if type(mv) is IMP.core.RigidBodyMover:
272  mvtype="RB"
273  if not mv in mvrb_color:
274  color=colors[len(mvrb_color.keys())]
275  mvrb_color[mv]=color
276  for mol in movers_mols_res[mv]:
277  if not mol in elements: elements[mol]=[]
278  for seg in movers_mols_res[mv][mol].segs:
279  try:
280  elements[mol].append((seg[0],seg[-1]," ","pdb",mvrb_color[mv]))
281  except:
282  continue
283  if type(mv) is IMP.core.BallMover:
284  mvtype="FB"
285  for mol in movers_mols_res[mv]:
286  if not mol in elements: elements[mol]=[]
287  for seg in movers_mols_res[mv][mol].segs:
288  elements[mol].append((seg[0],seg[-1]," ","bead"))
289  if type(mv) is IMP.pmi.TransformMover:
290  mvtype="SRB"
291 
292  # sort everything
293  for mol in elements:
294  elements[mol].sort(key=lambda tup: tup[0])
295  elements[mol].append((elements[mol][-1][1],elements[mol][-1][1], " ", "end"))
296 
297  for name in elements:
298  k = name
299  tmplist = sorted(elements[name], key=itemgetter(0))
300  try:
301  endres = tmplist[-1][1]
302  except:
303  print("draw_component_composition: missing information for component %s" % name)
304  return
305  fig = pyplot.figure(figsize=(26.0 * float(endres) / max + 2, 2))
306  ax = fig.add_axes([0.05, 0.475, 0.9, 0.15])
307 
308  # Set the colormap and norm to correspond to the data for which
309  # the colorbar will be used.
310  cmap = mpl.cm.cool
311  norm = mpl.colors.Normalize(vmin=5, vmax=10)
312  bounds = [1]
313  colors = []
314 
315  for n, l in enumerate(tmplist):
316  firstres = l[0]
317  lastres = l[1]
318  if l[3] != "end":
319  if bounds[-1] != l[0]:
320  colors.append("white")
321  bounds.append(l[0])
322  if l[3] == "pdb":
323  colors.append("#99CCFF")
324  if l[3] == "bead":
325  colors.append("white")
326  if l[3] == "helix":
327  colors.append("#33CCCC")
328  if l[3] != "end":
329  bounds.append(l[1] + 1)
330  else:
331  if l[3] == "pdb":
332  colors.append(l[4])
333  if l[3] == "bead":
334  colors.append("white")
335  if l[3] == "helix":
336  colors.append("#33CCCC")
337  if l[3] != "end":
338  bounds.append(l[1] + 1)
339  else:
340  if bounds[-1] - 1 == l[0]:
341  bounds.pop()
342  bounds.append(l[0])
343  else:
344  colors.append("white")
345  bounds.append(l[0])
346 
347  bounds.append(bounds[-1])
348  colors.append("white")
349  cmap = mpl.colors.ListedColormap(colors)
350  cmap.set_over('0.25')
351  cmap.set_under('0.75')
352 
353  norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
354  cb2 = mpl.colorbar.ColorbarBase(ax, cmap=cmap,
355  norm=norm,
356  # to use 'extend', you must
357  # specify two extra boundaries:
358  boundaries=bounds,
359  ticks=bounds, # optional
360  spacing='proportional',
361  orientation='horizontal')
362 
363  extra_artists = []
364  npdb = 0
365 
366  if draw_pdb_names:
367  for l in tmplist:
368  if l[3] == "pdb":
369  npdb += 1
370  mid = 1.0 / endres * float(l[0])
371  # t =ax.text(mid, float(npdb-1)/2.0+1.5, l[2], ha="left", va="center", rotation=0,
372  # size=10)
373  # t=ax.annotate(l[0],2)
374  t = ax.annotate(
375  l[2], xy=(mid, 1), xycoords='axes fraction',
376  xytext=(mid + 0.025, float(npdb - 1) / 2.0 + 1.5), textcoords='axes fraction',
377  arrowprops=dict(arrowstyle="->",
378  connectionstyle="angle,angleA=0,angleB=90,rad=10"),
379  )
380  extra_artists.append(t)
381 
382  # set the title of the bar
383  title = ax.text(-0.005, 0.5, k, ha="right", va="center", rotation=90,
384  size=20)
385 
386  extra_artists.append(title)
387  # changing the xticks labels
388  labels = len(bounds) * [" "]
389  ax.set_xticklabels(labels)
390  mid = 1.0 / endres * float(bounds[0])
391  t = ax.annotate(bounds[0], xy=(mid, 0), xycoords='axes fraction',
392  xytext=(mid - 0.01, -0.5), textcoords='axes fraction',)
393  extra_artists.append(t)
394  offsets = [0, -0.5, -1.0]
395  nclashes = 0
396  for n in range(1, len(bounds)):
397  if bounds[n] == bounds[n - 1]:
398  continue
399  mid = 1.0 / endres * float(bounds[n])
400  if (float(bounds[n]) - float(bounds[n - 1])) / max <= 0.01:
401  nclashes += 1
402  offset = offsets[nclashes % 3]
403  else:
404  nclashes = 0
405  offset = offsets[0]
406  if offset > -0.75:
407  t = ax.annotate(
408  bounds[n], xy=(mid, 0), xycoords='axes fraction',
409  xytext=(mid, -0.5 + offset), textcoords='axes fraction')
410  else:
411  t = ax.annotate(
412  bounds[n], xy=(mid, 0), xycoords='axes fraction',
413  xytext=(mid, -0.5 + offset), textcoords='axes fraction', arrowprops=dict(arrowstyle="-"))
414  extra_artists.append(t)
415 
416  cb2.add_lines(bounds, ["black"] * len(bounds), [1] * len(bounds))
417  # cb2.set_label(k)
418 
419  pyplot.savefig(
420  k + "structure.pdf")
421  #transparent="True")
422  #dpi=150,
423  #bbox_extra_artists=(extra_artists),
424  #bbox_inches='tight')
425  if show:
426  pyplot.show()
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Molecule.h:35
A class to read RMF files and make a network contact map.
Definition: topology.py:12
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Residue.h:155
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
Miscellaneous utilities.
Definition: tools.py:1
Modify the transformation of a rigid body.
def make_plot
plot the interaction matrix
Definition: topology.py:92
This class stores integers in ordered compact lists eg: [[1,2,3],[6,7,8]] the methods help splitting ...
Definition: tools.py:1172
Move continuous particle variables by perturbing them within a ball.
def add_rmf
Add selections from an RMF file.
Definition: topology.py:50
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
ParticleIndexPairs get_indexes(const ParticlePairsTemp &ps)
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def __init__
Set up a new graphXL object.
Definition: topology.py:26
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name.
Modify the transformation of a rigid body.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
Find all nearby pairs by testing all pairs.
Classes for writing output files and processing them.
Definition: output.py:1
A decorator for a residue.
Definition: Residue.h:134
The general base class for IMP exceptions.
Definition: exception.h:49
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Python classes to represent, score, sample and analyze models.
Functionality for loading, creating, manipulating and scoring atomic structures.
A decorator for a molecule.
Definition: Molecule.h:24
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:1037