IMP logo
IMP Reference Guide  2.7.0
The Integrative Modeling Platform
xltable.py
1 """@namespace IMP.pmi.io.xltable
2  Tools to plot a contact map overlaid with cross-links.
3 """
4 
5 from __future__ import print_function
6 from math import sqrt
7 from scipy.spatial.distance import cdist
8 import numpy as np
9 import matplotlib as mpl
10 mpl.use('Agg')
11 import matplotlib.cm as cm
12 import matplotlib.pyplot as plt
13 
14 from collections import defaultdict
15 import pickle
16 import IMP
17 import IMP.atom
18 import IMP.rmf
19 import RMF
20 import IMP.pmi
21 import IMP.pmi.io
24 import IMP.pmi.topology
25 import IMP.pmi.analysis
26 class XLTable():
27  """ class to read, analyze, and plot xlink data on contact maps
28  Canonical way to read the data:
29  1) load sequences and name them
30  2) load coordinates for those sequences from PDB file
31  3) add crosslinks
32  4) create contact map
33  5) plot
34  """
35 
36  def __init__(self,contact_threshold):
37  self.sequence_dict={}
38  self.cross_link_db = None
39  self.residue_pair_list = [] # list of special residue pairs to display
40  self.distance_maps = [] # distance map for each copy of the complex
41  self.contact_freqs = None
42  self.num_pdbs = 0
43  self.num_rmfs = 0
44  self.index_dict = defaultdict(list) # location in the dmap of each residue
45  self.contact_threshold = contact_threshold
46  # internal things
47  self._first = True
48  self.index_dict={}
49  self.stored_dists={}
50  self.mdl = IMP.Model()
51 
52  def _colormap_distance(self, dist, threshold=35, tolerance=0):
53  if dist < threshold - tolerance:
54  return "Green"
55  elif dist >= threshold + tolerance:
56  return "Orange"
57  else:
58  return "Red"
59 
60  def _colormap_satisfaction(self, sat, threshold=0.5, tolerance=0.1):
61  if sat >= threshold + tolerance:
62  print(sat, "green")
63  return "Green"
64  elif sat < threshold + tolerance and sat >= threshold - tolerance :
65  print(sat, "orange")
66  return "Orange"
67  else:
68  print(sat, "orange")
69  return "Orange"
70 
71  def _get_percentage_satisfaction(self,r1,c1,r2,c2,threshold=35):
72  try:
73  idx1=self.index_dict[c1][r1]
74  except:
75  return None
76  try:
77  idx2=self.index_dict[c2][r2]
78  except:
79  return None
80  nsatisfied=0
81  for dists in self.dist_maps:
82  dist=dists[idx1,idx2]
83  if dist<threshold: nsatisfied+=1
84  return float(nsatisfied)/len(self.dist_maps)
85 
86  def _get_distance(self,r1,c1,r2,c2):
87  if self.index_dict is not None:
88  try:
89  idx1=self.index_dict[c1][r1]
90  except:
91  return None
92  try:
93  idx2=self.index_dict[c2][r2]
94  except:
95  return None
96  return self.av_dist_map[idx1,idx2]
97  else:
98  if (r1,c1,r2,c2) not in self.stored_dists.keys():
99  sel=IMP.atom.Selection(self.prots,molecule=c1,residue_index=r1)
100  selpart=sel.get_selected_particles()
101  selpart_res_one=list(set(self.particles_resolution_one) & set(selpart))
102  if len(selpart_res_one)>1: return None
103  if len(selpart_res_one)==0: return None
104  selpart_res_one_1=selpart_res_one[0]
105  sel=IMP.atom.Selection(self.prots,molecule=c2,residue_index=r2)
106  selpart=sel.get_selected_particles()
107  selpart_res_one=list(set(self.particles_resolution_one) & set(selpart))
108  if len(selpart_res_one)>1: return None
109  if len(selpart_res_one)==0: return None
110  selpart_res_one_2=selpart_res_one[0]
111  d1=IMP.core.XYZ(selpart_res_one_1)
112  d2=IMP.core.XYZ(selpart_res_one_2)
113  dist=IMP.core.get_distance(d1,d2)
114  self.stored_dists[(r1,c1,r2,c2)]=dist
115  else:
116  dist=self.stored_dists[(r1,c1,r2,c2)]
117  return dist
118 
119 
120  def _get_distance_and_particle_pair(self,r1,c1,r2,c2):
121  '''more robust and slower version of above'''
122  sel=IMP.atom.Selection(self.prots,molecule=c1,residue_index=r1,resolution=1)
123  selpart_1=sel.get_selected_particles()
124  #selpart_res_one_1=list(set(self.particles_resolution_one) & set(selpart))
125  if len(selpart_1)==0: return None
126  sel=IMP.atom.Selection(self.prots,molecule=c2,residue_index=r2,resolution=1)
127  selpart_2=sel.get_selected_particles()
128  #selpart_res_one_2=list(set(self.particles_resolution_one) & set(selpart))
129  if len(selpart_2)==0: return None
130  mindist=None
131  minparticlepair=None
132  for p1 in selpart_1:
133  for p2 in selpart_2:
134  if p1 == p2: continue
135  d1=IMP.core.XYZ(p1)
136  d2=IMP.core.XYZ(p2)
137  dist=IMP.core.get_distance(d1,d2)
138  if mindist is None or mindist>dist:
139  mindist=dist
140  minparticlepair=(p1,p2)
141  return (mindist,minparticlepair[0],minparticlepair[1])
142 
143  def _internal_load_maps(self,maps_fn):
144  npzfile = np.load(maps_fn)
145  cname_array=npzfile['cname_array']
146  idx_array=npzfile['idx_array']
147  index_dict={}
148  for cname,idxs in zip(cname_array,idx_array):
149  tmp=list(idxs)
150  if -1 in tmp:
151  index_dict[cname]=tmp[0:tmp.index(-1)]
152  else:
153  index_dict[cname]=tmp
154  av_dist_map = npzfile['av_dist_map']
155  contact_map = npzfile['contact_map']
156  return index_dict,av_dist_map,contact_map
157 
158  def load_sequence_from_fasta_file(self,fasta_file,id_in_fasta_file,protein_name):
159  """ read sequence. structures are displayed in the same order as sequences are read.
160  fasta_file: file to read
161  id_in_fasta_file: id of desired sequence
162  protein_name: identifier for this sequence (use same name when handling coordinates)
163  can provide the fasta name (for retrieval) and the protein name (for storage) """
164 
165  record_dict = IMP.pmi.topology.Sequences(fasta_file)
166  if id_in_fasta_file is None:
167  id_in_fasta_file = name
168  if id_in_fasta_file not in record_dict:
169  raise KeyError("id %s not found in fasta file" % id_in_fasta_file)
170  length = len(record_dict[id_in_fasta_file])
171 
172  self.sequence_dict[protein_name] = str(record_dict[id_in_fasta_file])
173 
174  def load_pdb_coordinates(self,pdbfile,chain_to_name_map):
175  """ read coordinates from a pdb file. also appends to distance maps
176  @param pdbfile file for reading coords
177  @param chain_to_name_map correspond chain ID with protein name (will ONLY read these chains)
178  Key: PDB chain ID. Value: Protein name (set in sequence reading)
179  \note This function returns an error if the sequence for each chain has NOT been read
180  """
181  mh = IMP.atom.read_pdb(pdbfile,self.mdl,IMP.atom.CAlphaPDBSelector())
182  total_len = sum(len(self.sequence_dict[s]) for s in self.sequence_dict)
183  coords = np.ones((total_len,3)) * 1e5 #default to coords "very far away"
184  prev_stop = 0
185  for cid in chain_to_name_map:
186  cname = chain_to_name_map[cid]
187  if cname not in self.sequence_dict:
188  print("ERROR: chain",cname,'has not been read or has a naming mismatch')
189  return
190  if self._first:
191  self.index_dict[cname]=range(prev_stop,prev_stop+len(self.sequence_dict[cname]))
192  sel = IMP.atom.Selection(mh,chain_id=cid)
193  for p in sel.get_selected_particles():
195  coords[rnum+prev_stop-1,:] = list(IMP.core.XYZ(p).get_coordinates())
196  prev_stop+=len(self.sequence_dict[cname])
197  dists = cdist(coords, coords)
198  binary_dists = np.where((dists <= self.contact_threshold) & (dists >= 1.0), 1.0, 0.0)
199  if self._first:
200  self.dist_maps= [dists]
201  self.av_dist_map = dists
202  self.contact_freqs = binary_dists
203  self._first=False
204  else:
205  self.dist_maps.append(dists)
206  self.av_dist_map += dists
207  self.contact_freqs += binary_dists
208  IMP.atom.destroy(mh)
209  del mh
210  self.num_pdbs+=1
211 
212  def load_rmf_coordinates(self,rmf_name,rmf_frame_index, chain_names, nomap=False):
213  """ read coordinates from a rmf file. It needs IMP to run.
214  rmf has been created using IMP.pmi conventions. It gets the
215  highest resolution automatically. Also appends to distance maps
216  @param rmf_name file for reading coords
217  @param rmf_frame_index frame index from the rmf
218  @param nomap Default False, if True it will not calculate the contact map
219  """
220  (particles_resolution_one, prots)=self._get_rmf_structure(rmf_name,rmf_frame_index)
221 
222  total_len = sum(len(self.sequence_dict[s]) for s in self.sequence_dict)
223  print(self.sequence_dict,total_len)
224 
225  coords = np.ones((total_len,3)) * 1e6 #default to coords "very far away"
226  prev_stop=0
227  sorted_particles=IMP.pmi.tools.sort_by_residues(particles_resolution_one)
228 
229  self.prots=prots
230  self.particles_resolution_one=particles_resolution_one
231 
232  if nomap:
233  return
234 
235  for cname in chain_names:
236  print(cname)
237  if self._first:
238  self.index_dict[cname]=range(prev_stop,prev_stop+len(self.sequence_dict[cname]))
239  rindexes=range(1,len(self.sequence_dict[cname])+1)
240  for rnum in rindexes:
241  sel=IMP.atom.Selection(prots,molecule=cname,residue_index=rnum)
242  selpart=sel.get_selected_particles()
243  selpart_res_one=list(set(particles_resolution_one) & set(selpart))
244  if len(selpart_res_one)>1: continue
245  if len(selpart_res_one)==0: continue
246  selpart_res_one=selpart_res_one[0]
247  try:
248  coords[rnum+prev_stop-1,:]=IMP.core.XYZ(selpart_res_one).get_coordinates()
249  except IndexError:
250  print("Error: exceed max size",prev_stop,total_len,cname,rnum)
251  exit()
252  prev_stop+=len(self.sequence_dict[cname])
253  dists = cdist(coords, coords)
254  binary_dists = np.where((dists <= self.contact_threshold) & (dists >= 1.0), 1.0, 0.0)
255  if self._first:
256  self.dist_maps= [dists]
257  self.av_dist_map = dists
258  self.contact_freqs = binary_dists
259  self._first=False
260  else:
261  self.dist_maps.append(dists)
262  self.av_dist_map += dists
263  self.contact_freqs += binary_dists
264  self.num_rmfs+=1
265 
266 
267  def _get_rmf_structure(self,rmf_name,rmf_frame_index):
268  rh= RMF.open_rmf_file_read_only(rmf_name)
269  prots=IMP.rmf.create_hierarchies(rh, self.mdl)
270  IMP.rmf.load_frame(rh, rmf_frame_index)
271  print("getting coordinates for frame %i rmf file %s" % (rmf_frame_index, rmf_name))
272  del rh
273 
275 
276  protein_names=particle_dict.keys()
277  particles_resolution_one=[]
278  for k in particle_dict:
279  particles_resolution_one+=(particle_dict[k])
280 
281  return particles_resolution_one, prots
282 
283 
284  def save_maps(self,maps_fn):
285  maxlen=max(len(self.index_dict[key]) for key in self.index_dict)
286  cnames=[]
287  idxs=[]
288  for cname,idx in self.index_dict.iteritems():
289  cnames.append(cname)
290  idxs.append(idx+[-1]*(maxlen-len(idx)))
291  idx_array=np.array(idxs)
292  cname_array=np.array(cnames)
293  np.savez(maps_fn,
294  cname_array=cname_array,
295  idx_array=idx_array,
296  av_dist_map=self.av_dist_map,
297  contact_map=self.contact_freqs)
298 
299  def load_maps(self,maps_fn):
300  self.index_dict,self.av_dist_map,self.contact_freqs=self._internal_load_maps(maps_fn)
301 
302  def load_crosslinks(self,CrossLinkDataBase,distance_field=None):
303  """ read crosslinks from a CSV file.
304  provide a CrossLinkDataBaseKeywordsConverter to explain the columns
305  @distance_field is the optional keyword for the distance to be read form the file.
306  This can skip the rmf reading to calculate the distance of cross-links if
307  already provided in the csv file."""
308  if type(CrossLinkDataBase) is not IMP.pmi.io.crosslink.CrossLinkDataBase:
309  raise TypeError("Crosslink database must be a IMP.pmi.io.CrossLinkDataBase type")
310  self.cross_link_db=CrossLinkDataBase
311  if distance_field is not None:
312  total_len = sum(len(self.sequence_dict[s]) for s in self.sequence_dict)
313  zeros = np.zeros((total_len,3))
314  self.av_dist_map = cdist(zeros,zeros)
315  for xl in self.cross_link_db:
316  c1=xl[self.cross_link_db.protein1_key]
317  c2=xl[self.cross_link_db.protein2_key]
318  r1=xl[self.cross_link_db.residue1_key]
319  r2=xl[self.cross_link_db.residue2_key]
320  try:
321  self.stored_dists[(r1,c1,r2,c2)]=float(xl[distance_field])
322  self.stored_dists[(r2,c2,r1,c1)]=float(xl[distance_field])
323  except ValueError:
324  self.stored_dists[(r1,c1,r2,c2)]=10e6
325  self.stored_dists[(r2,c2,r1,c1)]=10e6
326  else:
327  for xl in self.cross_link_db:
328  c1=xl[self.cross_link_db.protein1_key]
329  c2=xl[self.cross_link_db.protein2_key]
330  r1=xl[self.cross_link_db.residue1_key]
331  r2=xl[self.cross_link_db.residue2_key]
332  self.stored_dists[(r1,c1,r2,c2)]=self._get_distance(r1,c1,r2,c2)
333  self.stored_dists[(r2,c2,r1,c1)]=self._get_distance(r2,c2,r1,c1)
334 
335  def set_residue_pairs_to_display(self,residue_type_pair):
336  """ select the atom names of residue pairs to plot on the contact map
337  list of residues types must be single letter code
338  e.g. residue_type_pair=("K","K")
339  """
340  rtp=sorted(residue_type_pair)
341  for prot1 in self.sequence_dict:
342  seq1=self.sequence_dict[prot1]
343  for nres1,res1 in enumerate(seq1):
344  for prot2 in self.sequence_dict:
345  seq2=self.sequence_dict[prot2]
346  for nres2,res2 in enumerate(seq2):
347  if sorted((res1,res2))==rtp:
348  self.residue_pair_list.append((nres1+1,prot1,nres2+1,prot2))
349 
350  def setup_contact_map(self):
351  """ loop through each distance map and get frequency of contacts
352  """
353  if self.num_pdbs!=0 and self.num_rmfs==0:
354  self.av_dist_map = 1.0/self.num_pdbs * self.av_dist_map
355  self.contact_freqs = 1.0/self.num_pdbs * self.contact_freqs
356  if self.num_pdbs==0 and self.num_rmfs!=0:
357  self.av_dist_map = 1.0/self.num_rmfs * self.av_dist_map
358  self.contact_freqs = 1.0/self.num_rmfs * self.contact_freqs
359 
360  def setup_difference_map(self,maps_fn1,maps_fn2,thresh):
361  idx1,av1,contact1=self._internal_load_maps(maps_fn1)
362  idx2,av2,contact2=self._internal_load_maps(maps_fn2)
363  if idx1!=idx2:
364  print("UH OH: index dictionaries do not match!")
365  exit()
366  self.index_dict=idx1
367  self.av_dist_map=av1 # should we store both somehow? only needed for XL
368 
369  def logic(c1,c2):
370  if c1==0 and c2==0: # white
371  return 0
372  elif c1>thresh and c2<thresh: # red
373  return 1
374  elif c1<thresh and c2>thresh: # blue
375  return 2
376  else: # green
377  return 3
378  f = np.vectorize(logic,otypes=[np.int])
379  print('computing contact map')
380  self.contact_freqs = f(contact1,contact2)
381  print('done')
382 
383 
384 
385  def spring_layout(self,ax,plt,data, annotations, iterations = 100, k=1):
386 
387  import networkx as nx
388  import numpy
389  import scipy.spatial
390 
391 
392  #import matplotlib.pyplot as plt
393 
394  """
395  Author: G. Bouvier, Pasteur Institute, Paris
396  Website: http://bloggb.fr/2015/10/19/spring_layout_for_annotating_plot_in_matplotlib.html
397  - data: coordinates of your points [(x1,y1), (x2,y2), ..., (xn,yn)]
398  - annotations: text for your annotation ['str_1', 'str_2', ..., 'str_n']
399  - iterations: number of iterations for spring layout
400  - k: optimal distance between nodes
401  """
402  G = nx.Graph()
403  init_pos = {} # initial position
404  x, y = [e[0] for e in data], [e[1] for e in data]
405  xmin, xmax = min(x), max(x)
406  ymin, ymax = min(y), max(y)
407 
408  m=IMP.Model()
409  particles={}
410  particles_fixed={}
411  mvs = []
412 
413  rs = IMP.RestraintSet(m, 'distance')
414 
415  for i,xy in enumerate(data):
416  p=IMP.Particle(m)
417  G.add_node(i)
418  init_pos[i] = xy
420  IMP.core.XYZ(p).set_coordinates((xy[0],xy[1],0))
421  IMP.core.XYZR(p).set_radius(100)
422  IMP.core.XYZ(p).set_coordinates_are_optimized(True)
423  particles[i]=p
424  Xfloatkey = IMP.core.XYZ.get_xyz_keys()[0]
425  Yfloatkey = IMP.core.XYZ.get_xyz_keys()[1]
426  mvs.append(IMP.core.NormalMover([p], [Xfloatkey], 5))
427  mvs.append(IMP.core.NormalMover([p], [Yfloatkey], 5))
428  p=IMP.Particle(m)
430  IMP.core.XYZ(p).set_coordinates((xy[0],xy[1],0))
431  IMP.core.XYZR(p).set_radius(100)
432  particles_fixed[i]=p
433  ts1 = IMP.core.HarmonicUpperBound(200, 0.001)
434  rs.add_restraint(
436  particles[i],
437  particles_fixed[i]))
438 
439  ssps = IMP.core.SoftSpherePairScore(1.0)
441 
442  ps = particles.values()+particles_fixed.values()
443 
444  lsa.add(IMP.get_indexes(ps))
445 
447  cpc = IMP.container.ClosePairContainer(lsa, 0.0, rbcpf, 10.0)
448  evr = IMP.container.PairsRestraint(ssps, cpc)
449  rs.add_restraint(evr)
450 
451  '''
452  #G.add_node(text)
453  #G.add_edge(xy, text)
454  #init_pos[text] = xy
455 
456  delTri = scipy.spatial.Delaunay(data)
457  #plt.scatter([e[0] for e in data], [e[1] for e in data])
458  # create a set for edges that are indexes of the points
459  edges = set()
460  # for each Delaunay triangle
461  for n in xrange(delTri.nsimplex):
462  # for each edge of the triangle
463  # sort the vertices
464  # (sorting avoids duplicated edges being added to the set)
465  # and add to the edges set
466  edge = sorted([delTri.vertices[n,0], delTri.vertices[n,1]])
467  edges.add((edge[0], edge[1]))
468  edge = sorted([delTri.vertices[n,0], delTri.vertices[n,2]])
469  edges.add((edge[0], edge[1]))
470  edge = sorted([delTri.vertices[n,1], delTri.vertices[n,2]])
471  edges.add((edge[0], edge[1]))
472 
473 
474  for e in edges:
475  G.add_edge(e[0],e[1])
476 
477  d1=IMP.core.XYZ(particles[e[0]])
478  d2=IMP.core.XYZ(particles[e[1]])
479  dist=IMP.core.get_distance(d1,d2)
480  ts1 = IMP.core.Harmonic(dist+150, 0.001)
481  rs.add_restraint(
482  IMP.core.DistanceRestraint(m, ts1,
483  particles[e[0]],
484  particles[e[1]]))
485  '''
486 
487  smv = IMP.core.SerialMover(mvs)
488 
489  mc = IMP.core.MonteCarlo(m)
490  mc.set_scoring_function(rs)
491  mc.set_return_best(False)
492  mc.set_kt(1.0)
493  mc.add_mover(smv)
494 
495 
496  '''
497  pos = nx.spring_layout(G ,pos=init_pos)
498  x_list=[]
499  y_list=[]
500  for i,xy in enumerate(data):
501  xynew = pos[i] * [xmax-xmin, ymax-ymin] + [xmin, ymin]
502  plt.plot((xy[0],xynew[0]), (xy[1],xynew[1]), 'k-')
503  x_list.append(xynew[0])
504  y_list.append(xynew[1])
505  #plt.annotate(name, xy, size=5, xycoords='data', xytext=xytext, textcoords='data', bbox=dict(boxstyle='round,pad=0.2', fc='yellow', alpha=0.3),\
506  # arrowprops=dict(arrowstyle="-", connectionstyle="arc3", color='gray'))
507 
508  points=ax.scatter(x_list,y_list,alpha=1)
509  #pos*=[numpy.ptp([e[0] for e in data]), numpy.ptp([e[1] for e in data])]
510  #plt.show()
511  return points
512  '''
513 
514  for i in range(10):
515  mc.optimize(1000*len(particles.keys())*2)
516  print(rs.evaluate(False))
517  x_list=[]
518  y_list=[]
519  for i,xy in enumerate(data):
520  p=particles[i]
521  coord=IMP.core.XYZ(p).get_coordinates()
522  xynew = (coord[0],coord[1])
523  plt.plot((xy[0],xynew[0]), (xy[1],xynew[1]), 'k-')
524  x_list.append(xynew[0])
525  y_list.append(xynew[1])
526  points=ax.scatter(x_list,y_list,alpha=1,facecolors='none', edgecolors='k')
527  return points
528 
529 
530 
531  def show_mpld3(self,fig,ax,points,xl_list,xl_labels):
532  import mpld3
533  from mpld3 import plugins
534  import pandas as pd
535 
536  # Define some CSS to control our custom labels
537  css = """
538  table
539  {
540  border-collapse: collapse;
541  }
542  th
543  {
544  color: #000000;
545  background-color: #ffffff;
546  }
547  td
548  {
549  background-color: #cccccc;
550  }
551  table, th, td
552  {
553  font-family:Arial, Helvetica, sans-serif;
554  border: 1px solid black;
555  text-align: right;
556  font-size: 10px;
557  }
558  """
559  df = pd.DataFrame(index=xl_labels)
560 
561  sorted_keys=sorted(xl_list[0].keys())
562 
563  for k in sorted_keys:
564  df[k] = np.array([xl[k] for xl in xl_list])
565 
566  labels = []
567  for i in range(len(xl_labels)):
568  label = df.ix[[i], :].T
569  # .to_html() is unicode; so make leading 'u' go away with str()
570  labels.append(str(label.to_html()))
571 
572  tooltip = plugins.PointHTMLTooltip(points, labels,
573  voffset=10, hoffset=10, css=css)
574  plugins.connect(fig, tooltip)
575  mpld3.save_html(fig,"output.html")
576 
577  def compute_distances(self):
578  data=[]
579  sorted_ids=None
580  sorted_group_ids=sorted(self.cross_link_db.data_base.keys())
581  for group in sorted_group_ids:
582  #group_block=[]
583  group_dists=[]
584  for xl in self.cross_link_db.data_base[group]:
585  if not sorted_ids:
586  sorted_ids=sorted(xl.keys())
587  data.append(sorted_ids+["UniqueID","Distance","MinAmbiguousDistance"])
588  (c1,c2,r1,r2)=IMP.pmi.io.crosslink._ProteinsResiduesArray(xl)
589  try:
590  (mdist,p1,p2)=self._get_distance_and_particle_pair(r1,c1,r2,c2)
591  except:
592  mdist="None"
593  values=[xl[k] for k in sorted_ids]
594  values+=[group,mdist]
595  group_dists.append(mdist)
596  #group_block.append(values)
597  xl["Distance"]=mdist
598 
599  for xl in self.cross_link_db.data_base[group]:
600  xl["MinAmbiguousDistance"]=min(group_dists)
601  #for l in group_block:
602  # l.append(min(group_dists))
603 
604 
605  def save_rmf_snapshot(self,filename,color_id=None):
606  if color_id is None:
607  color_id=self.cross_link_db.id_score_key
608  sorted_ids=None
609  sorted_group_ids=sorted(self.cross_link_db.data_base.keys())
610  list_of_pairs=[]
611  color_scores=[]
612  for group in sorted_group_ids:
613  group_xls=[]
614  group_dists_particles=[]
615  for xl in self.cross_link_db.data_base[group]:
616  xllabel=self.cross_link_db.get_short_cross_link_string(xl)
617  (c1,c2,r1,r2)=IMP.pmi.io.crosslink._ProteinsResiduesArray(xl)
618  print (c1,c2,r1,r2)
619  try:
620  (mdist,p1,p2)=self._get_distance_and_particle_pair(r1,c1,r2,c2)
621  except TypeError:
622  print("TypeError or missing chain/residue ",r1,c1,r2,c2)
623  continue
624  group_dists_particles.append((mdist,p1,p2,xllabel,float(xl[color_id])))
625  if group_dists_particles:
626  (minmdist,minp1,minp2,minxllabel,mincolor_score)=min(group_dists_particles, key = lambda t: t[0])
627  color_scores.append(mincolor_score)
628  list_of_pairs.append((minp1,minp2,minxllabel,mincolor_score))
629  else:
630  continue
631 
632 
633  m=self.prots[0].get_model()
634  linear = IMP.core.Linear(0, 0.0)
635  linear.set_slope(1.0)
636  dps2 = IMP.core.DistancePairScore(linear)
637  rslin = IMP.RestraintSet(m, 'linear_dummy_restraints')
638  sgs=[]
639  offset=min(color_scores)
640  maxvalue=max(color_scores)
641  for pair in list_of_pairs:
642  pr = IMP.core.PairRestraint(m, dps2, (pair[0], pair[1]))
643  pr.set_name(pair[2])
644  factor=(pair[3]-offset)/(maxvalue-offset)
645  print(factor)
646  c=IMP.display.get_rgb_color(factor)
647  seg=IMP.algebra.Segment3D(IMP.core.XYZ(pair[0]).get_coordinates(),IMP.core.XYZ(pair[1]).get_coordinates())
648  rslin.add_restraint(pr)
649  sgs.append(IMP.display.SegmentGeometry(seg,c,pair[2]))
650 
651  rh = RMF.create_rmf_file(filename)
652  IMP.rmf.add_hierarchies(rh, self.prots)
653  IMP.rmf.add_restraints(rh,[rslin])
654  IMP.rmf.add_geometries(rh, sgs)
656  del rh
657 
658  def get_residue_contacts(self,prot_listx,prot_listy):
659  for px in prot_listx:
660  for py in prot_listy:
661  indexes_x = self.index_dict[px]
662  minx = min(indexes_x)
663  maxx = max(indexes_x)
664  indexes_y = self.index_dict[py]
665  miny = min(indexes_y)
666  maxy = max(indexes_y)
667  array = self.contact_freqs[minx:maxx,miny:maxy]
668  (xresidues,yresidues)=np.where(array>0)
669  for n,xr in enumerate(xresidues):
670  print(xr,yresidues[n],px,py,array[xr,yresidues[n]])
671 
672 
673  def plot_table(self, prot_listx=None,
674  prot_listy=None,
675  no_dist_info=False,
676  confidence_info=False,
677  filter=None,
678  display_residue_pairs=False,
679  contactmap=False,
680  filename=None,
681  confidence_classes=None,
682  alphablend=0.1,
683  scale_symbol_size=1.0,
684  gap_between_components=0,
685  dictionary_of_gaps={},
686  colormap=cm.binary,
687  crosslink_threshold=None,
688  colornorm=None,
689  cbar_labels=None,
690  labels=False,
691  show_mpld3=False,
692  color_crosslinks_by_distance=True):
693  """ plot the xlink table with optional contact map.
694  prot_listx: list of protein names on the x-axis
695  prot_listy: list of protein names on the y-axis
696  no_dist_info: plot only the cross-links as grey spots
697  confidence_info:
698  filter: list of tuples to filter on. each one contains:
699  keyword in the database to be filtered on
700  relationship ">","==","<"
701  a value
702  example ("ID_Score",">",40)
703  display_residue_pairs: display all pairs defined in self.residue_pair_list
704  contactmap: display the contact map
705  filename: save to file (adds .pdf extension)
706  confidence_classes:
707  alphablend:
708  scale_symbol_size: rescale the symbol for the crosslink
709  gap_between_components: the numbeber of residues to leave blannk between each component
710  dictionary_of_gaps: add extra space after the given protein. dictionary_of_gaps={prot_name:extra_gap}
711  """
712  # prepare figure
713 
714 
715  fig = plt.figure(figsize=(100, 100))
716  ax = fig.add_subplot(111)
717  ax.set_xticks([])
718  ax.set_yticks([])
719 
720  if cbar_labels is not None:
721  if len(cbar_labels)!=4:
722  print("to provide cbar labels, give 3 fields (first=first input file, last=last input) in oppose order of input contact maps")
723  exit()
724  # set the list of proteins on the x axis
725  if prot_listx is None:
726  prot_listx = self.sequence_dict.keys()
727  prot_listx.sort()
728  nresx = gap_between_components + \
729  sum([len(self.sequence_dict[name])
730  + gap_between_components for name in prot_listx]) + \
731  sum([dictionary_of_gaps[prot] for prot in dictionary_of_gaps.keys()])
732 
733  # set the list of proteins on the y axis
734  if prot_listy is None:
735  prot_listy = self.sequence_dict.keys()
736  prot_listy.sort()
737  nresy = gap_between_components + \
738  sum([len(self.sequence_dict[name])
739  + gap_between_components for name in prot_listy]) + \
740  sum([dictionary_of_gaps[prot] for prot in dictionary_of_gaps.keys()])
741 
742  # this is the residue offset for each protein
743  resoffsetx = {}
744  resendx = {}
745  res = gap_between_components
746  for prot in prot_listx:
747  resoffsetx[prot] = res
748  res += len(self.sequence_dict[prot])
749  resendx[prot] = res
750  res += gap_between_components
751  if prot in dictionary_of_gaps.keys(): res+= dictionary_of_gaps[prot]
752 
753  resoffsety = {}
754  resendy = {}
755  res = gap_between_components
756  for prot in prot_listy:
757  resoffsety[prot] = res
758  res += len(self.sequence_dict[prot])
759  resendy[prot] = res
760  res += gap_between_components
761  if prot in dictionary_of_gaps.keys(): res+= dictionary_of_gaps[prot]
762 
763  resoffsetdiagonal = {}
764  res = gap_between_components
765  for prot in IMP.pmi.io.utilities.OrderedSet(prot_listx + prot_listy):
766  resoffsetdiagonal[prot] = res
767  res += len(self.sequence_dict[prot])
768  res += gap_between_components
769  if prot in dictionary_of_gaps.keys(): res+= dictionary_of_gaps[prot]
770 
771  # plot protein boundaries
772  xticks = []
773  xlabels = []
774  for n, prot in enumerate(prot_listx):
775  res = resoffsetx[prot]
776  end = resendx[prot]
777  for proty in prot_listy:
778  resy = resoffsety[proty]
779  endy = resendy[proty]
780  ax.plot([res, res], [resy, endy], linestyle='-',color='gray', lw=0.4)
781  ax.plot([end, end], [resy, endy], linestyle='-',color='gray', lw=0.4)
782  xticks.append((float(res) + float(end)) / 2)
783  xlabels.append(prot)
784 
785  yticks = []
786  ylabels = []
787  for n, prot in enumerate(prot_listy):
788  res = resoffsety[prot]
789  end = resendy[prot]
790  for protx in prot_listx:
791  resx = resoffsetx[protx]
792  endx = resendx[protx]
793  ax.plot([resx, endx], [res, res], linestyle='-',color='gray', lw=0.4)
794  ax.plot([resx, endx], [end, end], linestyle='-',color='gray', lw=0.4)
795  yticks.append((float(res) + float(end)) / 2)
796  ylabels.append(prot)
797 
798  # plot the contact map
799  if contactmap:
800  tmp_array = np.zeros((nresx, nresy))
801  for px in prot_listx:
802  for py in prot_listy:
803  resx = resoffsetx[px]
804  lengx = resendx[px] - 1
805  resy = resoffsety[py]
806  lengy = resendy[py] - 1
807  indexes_x = self.index_dict[px]
808  minx = min(indexes_x)
809  maxx = max(indexes_x)
810  indexes_y = self.index_dict[py]
811  miny = min(indexes_y)
812  maxy = max(indexes_y)
813  tmp_array[resx:lengx,resy:lengy] = self.contact_freqs[minx:maxx,miny:maxy]
814 
815  cax = ax.imshow(tmp_array,
816  cmap=colormap,
817  norm=colornorm,
818  origin='lower',
819  alpha=0.6,
820  interpolation='nearest')
821 
822  ax.set_xticks(xticks)
823  ax.set_xticklabels(xlabels, rotation=90)
824  ax.set_yticks(yticks)
825  ax.set_yticklabels(ylabels)
826  plt.setp(ax.get_xticklabels(), fontsize=6)
827  plt.setp(ax.get_yticklabels(), fontsize=6)
828 
829  # set the crosslinks
830  already_added_xls = []
831  xl_coordinates_tuple_list = []
832  xl_labels = []
833  x_list=[]
834  y_list=[]
835  color_list=[]
836  xl_list=[]
837 
838  markersize = 5 * scale_symbol_size
839  if self.cross_link_db:
840  for xl in self.cross_link_db:
841 
842  (c1,c2,r1,r2)=IMP.pmi.io.crosslink._ProteinsResiduesArray(xl)
843  label=xl[self.cross_link_db.unique_sub_id_key]
844  if color_crosslinks_by_distance:
845 
846  try:
847  mdist=self._get_distance(r1,c1,r2,c2)
848  if mdist is None: continue
849  color = self._colormap_distance(mdist,threshold=crosslink_threshold)
850  except KeyError:
851  color="gray"
852 
853  else:
854 
855  try:
856  ps=self._get_percentage_satisfaction(r1,c1,r2,c2)
857  if ps is None: continue
858  color = self._colormap_satisfaction(ps,threshold=0.2,tolerance=0.1)
859  except KeyError:
860  color="gray"
861 
862  try:
863  pos1 = r1 + resoffsetx[c1]
864  except:
865  continue
866  try:
867  pos2 = r2 + resoffsety[c2]
868  except:
869  continue
870 
871 
872  # everything below is used for plotting the diagonal
873  # when you have a rectangolar plots
874  pos_for_diagonal1 = r1 + resoffsetdiagonal[c1]
875  pos_for_diagonal2 = r2 + resoffsetdiagonal[c2]
876  if confidence_info:
877  if confidence == '0.01':
878  markersize = 14 * scale_symbol_size
879  elif confidence == '0.05':
880  markersize = 9 * scale_symbol_size
881  elif confidence == '0.1':
882  markersize = 6 * scale_symbol_size
883  else:
884  markersize = 15 * scale_symbol_size
885  else:
886  markersize = 5 * scale_symbol_size
887  '''
888  ax.plot([pos1],
889  [pos2],
890  'o',
891  c=color,
892  alpha=alphablend,
893  markersize=markersize)
894 
895  ax.plot([pos2],
896  [pos1],
897  'o',
898  c=color,
899  alpha=alphablend,
900  markersize=markersize)
901  '''
902  x_list.append(pos1)
903  x_list.append(pos2)
904  y_list.append(pos2)
905  y_list.append(pos1)
906  color_list.append(color)
907  color_list.append(color)
908  xl["Distance"]=mdist
909  xl_list.append(xl)
910  xl_list.append(xl)
911 
912  xl_labels.append(label)
913  xl_coordinates_tuple_list.append((float(pos1),float(pos2)))
914  xl_labels.append(label+"*")
915  xl_coordinates_tuple_list.append((float(pos2),float(pos1)))
916 
917  points=ax.scatter(x_list,y_list,s=markersize,c=color_list,alpha=alphablend)
918 
919  # plot requested residue pairs
920  if display_residue_pairs:
921  for rp in self.residue_pair_list:
922  r1=rp[0]
923  c1=rp[1]
924  r2=rp[2]
925  c2=rp[3]
926 
927  try:
928  dist=self._get_distance(r1,c1,r2,c2)
929  except:
930  continue
931 
932  if dist<=40.0:
933  print(rp)
934  try:
935  pos1 = r1 + resoffsetx[c1]
936  except:
937  continue
938  try:
939  pos2 = r2 + resoffsety[c2]
940  except:
941  continue
942 
943  ax.plot([pos1],
944  [pos2],
945  '+',
946  c="blue",
947  alpha=0.1,
948  markersize=markersize)
949 
950  # display and write to file
951  fig.set_size_inches(0.002 * nresx, 0.002 * nresy)
952  [i.set_linewidth(2.0) for i in ax.spines.itervalues()]
953  if cbar_labels is not None:
954  cbar = fig.colorbar(cax, ticks=[0.5,1.5,2.5,3.5])
955  cbar.ax.set_yticklabels(cbar_labels)# vertically oriented colorbar
956 
957  # set the labels using spring layout
958  if labels:
959  points=self.spring_layout(ax,plt,xl_coordinates_tuple_list, xl_labels)
960 
961  if filename:
962  #plt.show()
963  plt.savefig(filename, dpi=300,transparent="False")
964 
965 
966  if show_mpld3:
967  self.show_mpld3(fig,ax,points,xl_list,xl_labels)
A Monte Carlo optimizer.
Definition: MonteCarlo.h:45
Utility classes and functions for IO.
Definition: utilities.py:1
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
RMF::FrameID save_frame(RMF::FileHandle file, std::string name="")
Save the current state of the linked objects as a new RMF frame.
Set of python classes to create a multi-state, multi-resolution IMP hierarchy.
Upper bound harmonic function (non-zero when feature > mean)
class to read, analyze, and plot xlink data on contact maps Canonical way to read the data: 1) load s...
Definition: xltable.py:26
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def plot_table
plot the xlink table with optional contact map.
Definition: xltable.py:673
Color get_rgb_color(double f)
Return the color for f from the RGB color map.
def load_rmf_coordinates
read coordinates from a rmf file.
Definition: xltable.py:212
Return all close unordered pairs of particles taken from the SingletonContainer.
def load_pdb_coordinates
read coordinates from a pdb file.
Definition: xltable.py:174
Distance restraint between two particles.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
void read_pdb(TextInput input, int model, Hierarchy h)
Object used to hold a set of restraints.
Definition: RestraintSet.h:36
def setup_contact_map
loop through each distance map and get frequency of contacts
Definition: xltable.py:350
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
def load_sequence_from_fasta_file
read sequence.
Definition: xltable.py:158
ParticleIndexPairs get_indexes(const ParticlePairsTemp &ps)
def set_residue_pairs_to_display
select the atom names of residue pairs to plot on the contact map list of residues types must be sing...
Definition: xltable.py:335
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Store a list of ParticleIndexes.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
def load_crosslinks
read crosslinks from a CSV file.
Definition: xltable.py:302
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name.
void add_hierarchies(RMF::NodeHandle fh, const atom::Hierarchies &hs)
void add_geometries(RMF::NodeHandle parent, const display::GeometriesTemp &r)
Add geometries to a given parent node.
Linear function
Definition: Linear.h:19
void add_restraints(RMF::NodeHandle fh, const Restraints &hs)
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
Modify a set of continuous variables using a normal distribution.
Definition: NormalMover.h:20
Simple implementation of segments in 3D.
Definition: Segment3D.h:24
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Class to handle individual particles of a Model object.
Definition: Particle.h:41
Applies a PairScore to a Pair.
Definition: PairRestraint.h:29
Select all CA ATOM records.
Definition: pdb.h:77
static const FloatKeys & get_xyz_keys()
Get a vector containing the keys for x,y,z.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Support for the RMF file format for storing hierarchical molecular data and markup.
Applies a list of movers one at a time.
Definition: SerialMover.h:23
Applies a PairScore to each Pair in a list.
Perform more efficient close pair finding when rigid bodies are involved.
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27