1 from __future__
import print_function
9 from collections
import defaultdict
11 from scipy.spatial.distance
import cdist
14 """A class to read RMF files and make a network contact map"""
15 def __init__(self,model,selections,cutoff,frequency_cutoff,
16 colors=
None,fixed=
None,pos=
None,proteomic_edges=
None,quantitative_proteomic_data=
None):
17 """Set up a new graphXL object
18 @param model The IMP model
19 @param selection_dict A dictionary containing component names.
21 values are either moleculename or start,stop,moleculename
22 @param cutoff The distance cutoff
23 @param frequency_cutoff The frequency cutoff
24 @param colors A dictionary of colors (HEX code,values) for subunits (keywords)
25 @param fixed A list of subunits that are kept fixed
26 @param pos A dictionary with positions (tuple, values) of subunits (keywords)
27 @param proteomic_edges A list edges to represent proteomic data
28 @param quantitative_proteomic_data A dictionary of edges to represent
29 quantitative proteomic data such as PE Scores, or genetic interactions
34 self.selections = selections
35 self.contact_counts={}
36 self.edges=defaultdict(int)
37 for (name1,name2)
in itertools.combinations(self.selections.keys(),2):
38 self.edges[tuple(sorted((name1,name2)))]=0
40 self.frequency_cutoff=frequency_cutoff
42 self.gcpf.set_distance(self.cutoff)
43 self.names = list(self.selections.keys())
47 self.proteomic_edges=proteomic_edges
48 self.quantitative_proteomic_data=quantitative_proteomic_data
52 """Add selections from an RMF file"""
53 print(
'reading from RMF file',rmf_fn)
54 rh = RMF.open_rmf_file_read_only(rmf_fn)
58 ps_per_component=defaultdict(list)
60 self.size_per_component=defaultdict(int)
65 all_particles_by_resolution = []
66 for name
in part_dict:
67 all_particles_by_resolution += part_dict[name]
69 for component_name
in self.selections:
70 for seg
in self.selections[component_name]:
73 elif type(seg) == tuple:
75 residue_indexes=range(seg[0], seg[1] + 1))
77 raise Exception(
'could not understand selection tuple '+str(seg))
78 parts = list(set(s.get_selected_particles()) & set(all_particles_by_resolution))
83 for n1,name1
in enumerate(self.names):
84 for name2
in self.names[n1+1:]:
85 ncontacts = len(self.gcpf.get_close_pairs(self.mdl,
86 ps_per_component[name1],
87 ps_per_component[name2]))
89 self.edges[tuple(sorted((name1,name2)))]+=1.0
93 def make_plot(self,groups,out_fn,quantitative_proteomic_data=False):
95 plot the interaction matrix
96 @param groups is the list of groups of domains, eg,
97 [["protA_1-10","prot1A_11-100"],["protB"]....]
98 it will plot a space between different groups
99 @param out_fn name of the plot file
100 @param quantitative_proteomic_data plot the quantitative proteomic data
103 import matplotlib.pyplot
as plt
104 from matplotlib
import cm
107 ax.set_aspect(
'equal',
'box')
108 ax.xaxis.set_major_locator(plt.NullLocator())
109 ax.yaxis.set_major_locator(plt.NullLocator())
118 xoffset=squaredistance
119 yoffset=squaredistance
125 for subgroup
in group:
128 for domain
in subgroup:
129 domain_xlocations[domain]=xoffset
130 domain_ylocations[domain]=yoffset
136 xoffset+=squaredistance
137 yoffset+=squaredistance
139 for edge,count
in self.edges.items():
141 if quantitative_proteomic_data:
143 maxqpd=max(self.quantitative_proteomic_data.values())
144 minqpd=min(self.quantitative_proteomic_data.values())
145 if edge
in self.quantitative_proteomic_data:
146 value=self.quantitative_proteomic_data[edge]
147 elif (edge[1],edge[0])
in self.quantitative_proteomic_data:
148 value=self.quantitative_proteomic_data[(edge[1],edge[0])]
152 density=(1.0-(value-minqpd)/(maxqpd-minqpd))
154 density=(1.0-float(count)/self.num_rmf)
155 color=(density,density,1.0)
156 x=domain_xlocations[edge[0]]
157 y=domain_ylocations[edge[1]]
158 if x>y: xtmp=y; ytmp=x; x=xtmp; y=ytmp
159 rect = plt.Rectangle([x - squaresize / 2, y - squaresize / 2], squaresize, squaresize,
160 facecolor=color, edgecolor=
'Gray', linewidth=0.1)
162 rect = plt.Rectangle([y - squaresize / 2, x - squaresize / 2], squaresize, squaresize,
163 facecolor=color, edgecolor=
'Gray', linewidth=0.1)
171 def make_graph(self,out_fn):
174 print(
'num edges',len(self.edges))
175 for edge,count
in self.edges.items():
177 if float(count)/self.num_rmf>self.frequency_cutoff:
180 weights.append(count)
181 for nw,w
in enumerate(weights):
182 weights[nw]=float(weights[nw])/max(weights)
183 IMP.pmi.output.draw_graph(edges,
184 node_size=dict(self.size_per_component),
185 node_color=self.colors,
192 validation_edges=self.proteomic_edges)
A class to read RMF files and make a network contact map.
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
def make_plot
plot the interaction matrix
def add_rmf
Add selections from an RMF file.
ParticleIndexPairs get_indexes(const ParticlePairsTemp &ps)
def __init__
Set up a new graphXL object.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name.
Tools for clustering and cluster analysis.
Find all nearby pairs by testing all pairs.
Classes for writing output files and processing them.
The general base class for IMP exceptions.
Python classes to represent, score, sample and analyze models.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Support for the RMF file format for storing hierarchical molecular data and markup.