IMP  2.4.0
The Integrative Modeling Platform
topology.py
1 from __future__ import print_function
2 import IMP
3 import RMF
4 import IMP.atom
5 import IMP.rmf
6 import IMP.pmi
7 import IMP.pmi.analysis
8 import IMP.pmi.output
9 from collections import defaultdict
10 import numpy as np
11 from scipy.spatial.distance import cdist
12 
13 class TopologyPlot(object):
14  """A class to read RMF files and make a network contact map"""
15  def __init__(self,model,selections,cutoff,frequency_cutoff,
16  colors=None,fixed=None,pos=None,proteomic_edges=None,quantitative_proteomic_data=None):
17  """Set up a new graphXL object
18  @param model The IMP model
19  @param selection_dict A dictionary containing component names.
20  Keys are labels
21  values are either moleculename or start,stop,moleculename
22  @param cutoff The distance cutoff
23  @param frequency_cutoff The frequency cutoff
24  @param colors A dictionary of colors (HEX code,values) for subunits (keywords)
25  @param fixed A list of subunits that are kept fixed
26  @param pos A dictionary with positions (tuple, values) of subunits (keywords)
27  @param proteomic_edges A list edges to represent proteomic data
28  @param quantitative_proteomic_data A dictionary of edges to represent
29  quantitative proteomic data such as PE Scores, or genetic interactions
30  """
31  import itertools\
32 
33  self.mdl = model
34  self.selections = selections
35  self.contact_counts={}
36  self.edges=defaultdict(int)
37  for (name1,name2) in itertools.combinations(self.selections.keys(),2):
38  self.edges[tuple(sorted((name1,name2)))]=0
39  self.cutoff = cutoff
40  self.frequency_cutoff=frequency_cutoff
41  self.gcpf = IMP.core.GridClosePairsFinder()
42  self.gcpf.set_distance(self.cutoff)
43  self.names = list(self.selections.keys())
44  self.colors=colors
45  self.fixed=fixed
46  self.pos=pos
47  self.proteomic_edges=proteomic_edges
48  self.quantitative_proteomic_data=quantitative_proteomic_data
49  self.num_rmf=0
50 
51  def add_rmf(self,rmf_fn,nframe):
52  """Add selections from an RMF file"""
53  print('reading from RMF file',rmf_fn)
54  rh = RMF.open_rmf_file_read_only(rmf_fn)
55  prots = IMP.rmf.create_hierarchies(rh, self.mdl)
56  hier = prots[0]
57  IMP.rmf.load_frame(rh,0)
58  ps_per_component=defaultdict(list)
59  if self.num_rmf==0:
60  self.size_per_component=defaultdict(int)
61  self.mdl.update()
62 
63  #gathers particles for all components
65  all_particles_by_resolution = []
66  for name in part_dict:
67  all_particles_by_resolution += part_dict[name]
68 
69  for component_name in self.selections:
70  for seg in self.selections[component_name]:
71  if type(seg) == str:
72  s = IMP.atom.Selection(hier,molecule=seg)
73  elif type(seg) == tuple:
74  s = IMP.atom.Selection(hier,molecule=seg[2],
75  residue_indexes=range(seg[0], seg[1] + 1))
76  else:
77  raise Exception('could not understand selection tuple '+str(seg))
78  parts = list(set(s.get_selected_particles()) & set(all_particles_by_resolution))
79  ps_per_component[component_name] += IMP.get_indexes(parts)
80  if self.num_rmf==0:
81  self.size_per_component[component_name] += sum(len(IMP.pmi.tools.get_residue_indexes(p)) for p in parts)
82 
83  for n1,name1 in enumerate(self.names):
84  for name2 in self.names[n1+1:]:
85  ncontacts = len(self.gcpf.get_close_pairs(self.mdl,
86  ps_per_component[name1],
87  ps_per_component[name2]))
88  if ncontacts>0:
89  self.edges[tuple(sorted((name1,name2)))]+=1.0
90 
91  self.num_rmf+=1
92 
93  def make_plot(self,groups,out_fn,quantitative_proteomic_data=False):
94  '''
95  plot the interaction matrix
96  @param groups is the list of groups of domains, eg,
97  [["protA_1-10","prot1A_11-100"],["protB"]....]
98  it will plot a space between different groups
99  @param quantitative_proteomic_data plot the quantitative proteomic data
100  '''
101  import numpy as np
102  import matplotlib.pyplot as plt
103  from matplotlib import cm
104 
105  ax=plt.gca()
106  ax.set_aspect('equal', 'box')
107  ax.xaxis.set_major_locator(plt.NullLocator())
108  ax.yaxis.set_major_locator(plt.NullLocator())
109 
110  largespace=0.6
111  smallspace=0.5
112  squaredistance=1.0
113  squaresize=0.99
114  domain_xlocations={}
115  domain_ylocations={}
116 
117  xoffset=squaredistance
118  yoffset=squaredistance
119  xlabels=[]
120  ylabels=[]
121  for group in groups:
122  xoffset+=largespace
123  yoffset+=largespace
124  for subgroup in group:
125  xoffset+=smallspace
126  yoffset+=smallspace
127  for domain in subgroup:
128  domain_xlocations[domain]=xoffset
129  domain_ylocations[domain]=yoffset
130  #rect = plt.Rectangle([xoffset- squaresize / 2, yoffset - squaresize / 2], squaresize, squaresize,
131  # facecolor=(1,1,1), edgecolor=(0.1,0.1,0.1))
132 
133  #ax.add_patch(rect)
134  #ax.text(xoffset , yoffset ,domain,horizontalalignment='left',verticalalignment='center',rotation=-45.0)
135  xoffset+=squaredistance
136  yoffset+=squaredistance
137 
138  for edge,count in self.edges.items():
139 
140  if quantitative_proteomic_data:
141  #normalize
142  maxqpd=max(self.quantitative_proteomic_data.values())
143  minqpd=min(self.quantitative_proteomic_data.values())
144  if edge in self.quantitative_proteomic_data:
145  value=self.quantitative_proteomic_data[edge]
146  elif (edge[1],edge[0]) in self.quantitative_proteomic_data:
147  value=self.quantitative_proteomic_data[(edge[1],edge[0])]
148  else:
149  value=0.0
150  print(minqpd,maxqpd)
151  density=(1.0-(value-minqpd)/(maxqpd-minqpd))
152  else:
153  density=(1.0-float(count)/self.num_rmf)
154  color=(density,density,1.0)
155  x=domain_xlocations[edge[0]]
156  y=domain_ylocations[edge[1]]
157  if x>y: xtmp=y; ytmp=x; x=xtmp; y=ytmp
158  rect = plt.Rectangle([x - squaresize / 2, y - squaresize / 2], squaresize, squaresize,
159  facecolor=color, edgecolor='Gray', linewidth=0.1)
160  ax.add_patch(rect)
161  rect = plt.Rectangle([y - squaresize / 2, x - squaresize / 2], squaresize, squaresize,
162  facecolor=color, edgecolor='Gray', linewidth=0.1)
163  ax.add_patch(rect)
164 
165  ax.autoscale_view()
166  plt.savefig(out_fn)
167  plt.show()
168  exit()
169 
170  def make_graph(self,out_fn):
171  edges=[]
172  weights=[]
173  print('num edges',len(self.edges))
174  for edge,count in self.edges.items():
175  # filter if frequency of contacts is greater than frequency_cutoff
176  if float(count)/self.num_rmf>self.frequency_cutoff:
177  print(count,edge)
178  edges.append(edge)
179  weights.append(count)
180  for nw,w in enumerate(weights):
181  weights[nw]=float(weights[nw])/max(weights)
182  IMP.pmi.output.draw_graph(edges,#node_size=1000,
183  node_size=dict(self.size_per_component),
184  node_color=self.colors,
185  fixed=self.fixed,
186  pos=self.pos,
187  edge_thickness=1, #weights,
188  edge_alpha=0.3,
189  edge_color='gray',
190  out_filename=out_fn,
191  validation_edges=self.proteomic_edges)
A class to read RMF files and make a network contact map.
Definition: topology.py:13
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, kernel::Model *m)
def make_plot
plot the interaction matrix
Definition: topology.py:93
void load_frame(RMF::FileConstHandle file, unsigned int frame)
Definition: frames.h:27
def add_rmf
Add selections from an RMF file.
Definition: topology.py:51
def __init__
Set up a new graphXL object.
Definition: topology.py:27
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
Find all nearby pairs by testing all pairs.
Classes for writing output files and processing them.
Definition: output.py:1
Python classes to represent, score, sample and analyze models.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:62
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:930