IMP logo
IMP Reference Guide  2.6.1
The Integrative Modeling Platform
xltable.py
1 """@namespace IMP.pmi.io.xltable
2  Tools to plot a contact map overlaid with cross-links.
3 """
4 
5 from __future__ import print_function
6 from math import sqrt
7 from scipy.spatial.distance import cdist
8 import numpy as np
9 import matplotlib as mpl
10 mpl.use('Agg')
11 import matplotlib.cm as cm
12 import matplotlib.pyplot as plt
13 
14 from collections import defaultdict
15 import pickle
16 import IMP
17 import IMP.atom
18 import IMP.rmf
19 import RMF
20 import IMP.pmi
21 import IMP.pmi.io
24 import IMP.pmi.topology
25 import IMP.pmi.analysis
26 class XLTable():
27  """ class to read, analyze, and plot xlink data on contact maps
28  Canonical way to read the data:
29  1) load sequences and name them
30  2) load coordinates for those sequences from PDB file
31  3) add crosslinks
32  4) create contact map
33  5) plot
34  """
35 
36  def __init__(self,contact_threshold):
37  self.sequence_dict={}
38  self.cross_link_db = None
39  self.residue_pair_list = [] # list of special residue pairs to display
40  self.distance_maps = [] # distance map for each copy of the complex
41  self.contact_freqs = None
42  self.num_pdbs = 0
43  self.num_rmfs = 0
44  self.index_dict = defaultdict(list) # location in the dmap of each residue
45  self.contact_threshold = contact_threshold
46  # internal things
47  self._first = True
48  self.index_dict={}
49  self.stored_dists={}
50  self.mdl = IMP.Model()
51 
52  def _colormap_distance(self, dist, threshold=35, tolerance=0):
53  if dist < threshold - tolerance:
54  return "Green"
55  elif dist >= threshold + tolerance:
56  return "Orange"
57  else:
58  return "Red"
59 
60  def _colormap_satisfaction(self, sat, threshold=0.5, tolerance=0.1):
61  if sat >= threshold + tolerance:
62  print(sat, "green")
63  return "Green"
64  elif sat < threshold + tolerance and sat >= threshold - tolerance :
65  print(sat, "orange")
66  return "Orange"
67  else:
68  print(sat, "orange")
69  return "Orange"
70 
71  def _get_percentage_satisfaction(self,r1,c1,r2,c2,threshold=35):
72  try:
73  idx1=self.index_dict[c1][r1]
74  except:
75  return None
76  try:
77  idx2=self.index_dict[c2][r2]
78  except:
79  return None
80  nsatisfied=0
81  for dists in self.dist_maps:
82  dist=dists[idx1,idx2]
83  if dist<threshold: nsatisfied+=1
84  return float(nsatisfied)/len(self.dist_maps)
85 
86  def _get_distance(self,r1,c1,r2,c2):
87  if self.index_dict is not None:
88  try:
89  idx1=self.index_dict[c1][r1]
90  except:
91  return None
92  try:
93  idx2=self.index_dict[c2][r2]
94  except:
95  return None
96  return self.av_dist_map[idx1,idx2]
97  else:
98  if (r1,c1,r2,c2) not in self.stored_dists.keys():
99  sel=IMP.atom.Selection(self.prots,molecule=c1,residue_index=r1)
100  selpart=sel.get_selected_particles()
101  selpart_res_one=list(set(self.particles_resolution_one) & set(selpart))
102  if len(selpart_res_one)>1: return None
103  if len(selpart_res_one)==0: return None
104  selpart_res_one_1=selpart_res_one[0]
105  sel=IMP.atom.Selection(self.prots,molecule=c2,residue_index=r2)
106  selpart=sel.get_selected_particles()
107  selpart_res_one=list(set(self.particles_resolution_one) & set(selpart))
108  if len(selpart_res_one)>1: return None
109  if len(selpart_res_one)==0: return None
110  selpart_res_one_2=selpart_res_one[0]
111  d1=IMP.core.XYZ(selpart_res_one_1)
112  d2=IMP.core.XYZ(selpart_res_one_2)
113  dist=IMP.core.get_distance(d1,d2)
114  self.stored_dists[(r1,c1,r2,c2)]=dist
115  else:
116  dist=self.stored_dists[(r1,c1,r2,c2)]
117  return dist
118 
119 
120  def _get_distance_and_particle_pair(self,r1,c1,r2,c2):
121  '''more robust and slower version of above'''
122  sel=IMP.atom.Selection(self.prots,molecule=c1,residue_index=r1)
123  selpart=sel.get_selected_particles()
124  selpart_res_one=list(set(self.particles_resolution_one) & set(selpart))
125  if len(selpart_res_one)>1: return None
126  if len(selpart_res_one)==0: return None
127  selpart_res_one_1=selpart_res_one[0]
128  sel=IMP.atom.Selection(self.prots,molecule=c2,residue_index=r2)
129  selpart=sel.get_selected_particles()
130  selpart_res_one=list(set(self.particles_resolution_one) & set(selpart))
131  if len(selpart_res_one)>1: return None
132  if len(selpart_res_one)==0: return None
133  selpart_res_one_2=selpart_res_one[0]
134  d1=IMP.core.XYZ(selpart_res_one_1)
135  d2=IMP.core.XYZ(selpart_res_one_2)
136  dist=IMP.core.get_distance(d1,d2)
137  return (dist,selpart_res_one_1,selpart_res_one_2)
138 
139  def _internal_load_maps(self,maps_fn):
140  npzfile = np.load(maps_fn)
141  cname_array=npzfile['cname_array']
142  idx_array=npzfile['idx_array']
143  index_dict={}
144  for cname,idxs in zip(cname_array,idx_array):
145  tmp=list(idxs)
146  if -1 in tmp:
147  index_dict[cname]=tmp[0:tmp.index(-1)]
148  else:
149  index_dict[cname]=tmp
150  av_dist_map = npzfile['av_dist_map']
151  contact_map = npzfile['contact_map']
152  return index_dict,av_dist_map,contact_map
153 
154  def load_sequence_from_fasta_file(self,fasta_file,id_in_fasta_file,protein_name):
155  """ read sequence. structures are displayed in the same order as sequences are read.
156  fasta_file: file to read
157  id_in_fasta_file: id of desired sequence
158  protein_name: identifier for this sequence (use same name when handling coordinates)
159  can provide the fasta name (for retrieval) and the protein name (for storage) """
160 
161  record_dict = IMP.pmi.topology.Sequences(fasta_file)
162  if id_in_fasta_file is None:
163  id_in_fasta_file = name
164  if id_in_fasta_file not in record_dict:
165  raise KeyError("id %s not found in fasta file" % id_in_fasta_file)
166  length = len(record_dict[id_in_fasta_file])
167 
168  self.sequence_dict[protein_name] = str(record_dict[id_in_fasta_file])
169 
170  def load_pdb_coordinates(self,pdbfile,chain_to_name_map):
171  """ read coordinates from a pdb file. also appends to distance maps
172  @param pdbfile file for reading coords
173  @param chain_to_name_map correspond chain ID with protein name (will ONLY read these chains)
174  Key: PDB chain ID. Value: Protein name (set in sequence reading)
175  \note This function returns an error if the sequence for each chain has NOT been read
176  """
177  mh = IMP.atom.read_pdb(pdbfile,self.mdl,IMP.atom.CAlphaPDBSelector())
178  total_len = sum(len(self.sequence_dict[s]) for s in self.sequence_dict)
179  coords = np.ones((total_len,3)) * 1e5 #default to coords "very far away"
180  prev_stop = 0
181  for cid in chain_to_name_map:
182  cname = chain_to_name_map[cid]
183  if cname not in self.sequence_dict:
184  print("ERROR: chain",cname,'has not been read or has a naming mismatch')
185  return
186  if self._first:
187  self.index_dict[cname]=range(prev_stop,prev_stop+len(self.sequence_dict[cname]))
188  sel = IMP.atom.Selection(mh,chain_id=cid)
189  for p in sel.get_selected_particles():
191  coords[rnum+prev_stop-1,:] = list(IMP.core.XYZ(p).get_coordinates())
192  prev_stop+=len(self.sequence_dict[cname])
193  dists = cdist(coords, coords)
194  binary_dists = np.where((dists <= self.contact_threshold) & (dists >= 1.0), 1.0, 0.0)
195  if self._first:
196  self.dist_maps= [dists]
197  self.av_dist_map = dists
198  self.contact_freqs = binary_dists
199  self._first=False
200  else:
201  self.dist_maps.append(dists)
202  self.av_dist_map += dists
203  self.contact_freqs += binary_dists
204  IMP.atom.destroy(mh)
205  del mh
206  self.num_pdbs+=1
207 
208  def load_rmf_coordinates(self,rmf_name,rmf_frame_index, chain_names, nomap=False):
209  """ read coordinates from a rmf file. It needs IMP to run.
210  rmf has been created using IMP.pmi conventions. It gets the
211  highest resolution automatically. Also appends to distance maps
212  @param rmf_name file for reading coords
213  @param rmf_frame_index frame index from the rmf
214  @param nomap Default False, if True it will not calculate the contact map
215  """
216  (particles_resolution_one, prots)=self._get_rmf_structure(rmf_name,rmf_frame_index)
217 
218  total_len = sum(len(self.sequence_dict[s]) for s in self.sequence_dict)
219  print(self.sequence_dict,total_len)
220 
221  coords = np.ones((total_len,3)) * 1e6 #default to coords "very far away"
222  prev_stop=0
223  sorted_particles=IMP.pmi.tools.sort_by_residues(particles_resolution_one)
224 
225  self.prots=prots
226  self.particles_resolution_one=particles_resolution_one
227 
228  if nomap:
229  return
230 
231  for cname in chain_names:
232  print(cname)
233  if self._first:
234  self.index_dict[cname]=range(prev_stop,prev_stop+len(self.sequence_dict[cname]))
235  rindexes=range(1,len(self.sequence_dict[cname])+1)
236  for rnum in rindexes:
237  sel=IMP.atom.Selection(prots,molecule=cname,residue_index=rnum)
238  selpart=sel.get_selected_particles()
239  selpart_res_one=list(set(particles_resolution_one) & set(selpart))
240  if len(selpart_res_one)>1: continue
241  if len(selpart_res_one)==0: continue
242  selpart_res_one=selpart_res_one[0]
243  try:
244  coords[rnum+prev_stop-1,:]=IMP.core.XYZ(selpart_res_one).get_coordinates()
245  except IndexError:
246  print("Error: exceed max size",prev_stop,total_len,cname,rnum)
247  exit()
248  prev_stop+=len(self.sequence_dict[cname])
249  dists = cdist(coords, coords)
250  binary_dists = np.where((dists <= self.contact_threshold) & (dists >= 1.0), 1.0, 0.0)
251  if self._first:
252  self.dist_maps= [dists]
253  self.av_dist_map = dists
254  self.contact_freqs = binary_dists
255  self._first=False
256  else:
257  self.dist_maps.append(dists)
258  self.av_dist_map += dists
259  self.contact_freqs += binary_dists
260  self.num_rmfs+=1
261 
262 
263  def _get_rmf_structure(self,rmf_name,rmf_frame_index):
264  rh= RMF.open_rmf_file_read_only(rmf_name)
265  prots=IMP.rmf.create_hierarchies(rh, self.mdl)
266  IMP.rmf.load_frame(rh, rmf_frame_index)
267  print("getting coordinates for frame %i rmf file %s" % (rmf_frame_index, rmf_name))
268  del rh
269 
271 
272  protein_names=particle_dict.keys()
273  particles_resolution_one=[]
274  for k in particle_dict:
275  particles_resolution_one+=(particle_dict[k])
276 
277  return particles_resolution_one, prots
278 
279 
280  def save_maps(self,maps_fn):
281  maxlen=max(len(self.index_dict[key]) for key in self.index_dict)
282  cnames=[]
283  idxs=[]
284  for cname,idx in self.index_dict.iteritems():
285  cnames.append(cname)
286  idxs.append(idx+[-1]*(maxlen-len(idx)))
287  idx_array=np.array(idxs)
288  cname_array=np.array(cnames)
289  np.savez(maps_fn,
290  cname_array=cname_array,
291  idx_array=idx_array,
292  av_dist_map=self.av_dist_map,
293  contact_map=self.contact_freqs)
294 
295  def load_maps(self,maps_fn):
296  self.index_dict,self.av_dist_map,self.contact_freqs=self._internal_load_maps(maps_fn)
297 
298  def load_crosslinks(self,CrossLinkDataBase,distance_field=None):
299  """ read crosslinks from a CSV file.
300  provide a CrossLinkDataBaseKeywordsConverter to explain the columns
301  @distance_field is the optional keyword for the distance to be read form the file.
302  This can skip the rmf reading to calculate the distance of cross-links if
303  already provided in the csv file."""
304  if type(CrossLinkDataBase) is not IMP.pmi.io.crosslink.CrossLinkDataBase:
305  raise TypeError("Crosslink database must be a IMP.pmi.io.CrossLinkDataBase type")
306  self.cross_link_db=CrossLinkDataBase
307  if distance_field is not None:
308  total_len = sum(len(self.sequence_dict[s]) for s in self.sequence_dict)
309  zeros = np.zeros((total_len,3))
310  self.av_dist_map = cdist(zeros,zeros)
311  for xl in self.cross_link_db:
312  c1=xl[self.cross_link_db.protein1_key]
313  c2=xl[self.cross_link_db.protein2_key]
314  r1=xl[self.cross_link_db.residue1_key]
315  r2=xl[self.cross_link_db.residue2_key]
316  try:
317  self.stored_dists[(r1,c1,r2,c2)]=float(xl[distance_field])
318  self.stored_dists[(r2,c2,r1,c1)]=float(xl[distance_field])
319  except ValueError:
320  self.stored_dists[(r1,c1,r2,c2)]=10e6
321  self.stored_dists[(r2,c2,r1,c1)]=10e6
322 
323  def set_residue_pairs_to_display(self,residue_type_pair):
324  """ select the atom names of residue pairs to plot on the contact map
325  list of residues types must be single letter code
326  e.g. residue_type_pair=("K","K")
327  """
328  rtp=sorted(residue_type_pair)
329  for prot1 in self.sequence_dict:
330  seq1=self.sequence_dict[prot1]
331  for nres1,res1 in enumerate(seq1):
332  for prot2 in self.sequence_dict:
333  seq2=self.sequence_dict[prot2]
334  for nres2,res2 in enumerate(seq2):
335  if sorted((res1,res2))==rtp:
336  self.residue_pair_list.append((nres1+1,prot1,nres2+1,prot2))
337 
338  def setup_contact_map(self):
339  """ loop through each distance map and get frequency of contacts
340  """
341  if self.num_pdbs!=0 and self.num_rmfs==0:
342  self.av_dist_map = 1.0/self.num_pdbs * self.av_dist_map
343  self.contact_freqs = 1.0/self.num_pdbs * self.contact_freqs
344  if self.num_pdbs==0 and self.num_rmfs!=0:
345  self.av_dist_map = 1.0/self.num_rmfs * self.av_dist_map
346  self.contact_freqs = 1.0/self.num_rmfs * self.contact_freqs
347 
348  def setup_difference_map(self,maps_fn1,maps_fn2,thresh):
349  idx1,av1,contact1=self._internal_load_maps(maps_fn1)
350  idx2,av2,contact2=self._internal_load_maps(maps_fn2)
351  if idx1!=idx2:
352  print("UH OH: index dictionaries do not match!")
353  exit()
354  self.index_dict=idx1
355  self.av_dist_map=av1 # should we store both somehow? only needed for XL
356 
357  def logic(c1,c2):
358  if c1==0 and c2==0: # white
359  return 0
360  elif c1>thresh and c2<thresh: # red
361  return 1
362  elif c1<thresh and c2>thresh: # blue
363  return 2
364  else: # green
365  return 3
366  f = np.vectorize(logic,otypes=[np.int])
367  print('computing contact map')
368  self.contact_freqs = f(contact1,contact2)
369  print('done')
370 
371 
372 
373  def spring_layout(self,ax,plt,data, annotations, iterations = 100, k=1):
374 
375  import networkx as nx
376  import numpy
377  import scipy.spatial
378 
379 
380  #import matplotlib.pyplot as plt
381 
382  """
383  Author: G. Bouvier, Pasteur Institute, Paris
384  Website: http://bloggb.fr/2015/10/19/spring_layout_for_annotating_plot_in_matplotlib.html
385  - data: coordinates of your points [(x1,y1), (x2,y2), ..., (xn,yn)]
386  - annotations: text for your annotation ['str_1', 'str_2', ..., 'str_n']
387  - iterations: number of iterations for spring layout
388  - k: optimal distance between nodes
389  """
390  G = nx.Graph()
391  init_pos = {} # initial position
392  x, y = [e[0] for e in data], [e[1] for e in data]
393  xmin, xmax = min(x), max(x)
394  ymin, ymax = min(y), max(y)
395 
396  m=IMP.Model()
397  particles={}
398  particles_fixed={}
399  mvs = []
400 
401  rs = IMP.RestraintSet(m, 'distance')
402 
403  for i,xy in enumerate(data):
404  p=IMP.Particle(m)
405  G.add_node(i)
406  init_pos[i] = xy
408  IMP.core.XYZ(p).set_coordinates((xy[0],xy[1],0))
409  IMP.core.XYZR(p).set_radius(100)
410  IMP.core.XYZ(p).set_coordinates_are_optimized(True)
411  particles[i]=p
412  Xfloatkey = IMP.core.XYZ.get_xyz_keys()[0]
413  Yfloatkey = IMP.core.XYZ.get_xyz_keys()[1]
414  mvs.append(IMP.core.NormalMover([p], [Xfloatkey], 5))
415  mvs.append(IMP.core.NormalMover([p], [Yfloatkey], 5))
416  p=IMP.Particle(m)
418  IMP.core.XYZ(p).set_coordinates((xy[0],xy[1],0))
419  IMP.core.XYZR(p).set_radius(100)
420  particles_fixed[i]=p
421  ts1 = IMP.core.HarmonicUpperBound(200, 0.001)
422  rs.add_restraint(
424  particles[i],
425  particles_fixed[i]))
426 
427  ssps = IMP.core.SoftSpherePairScore(1.0)
429 
430  ps = particles.values()+particles_fixed.values()
431 
432  lsa.add(IMP.get_indexes(ps))
433 
435  cpc = IMP.container.ClosePairContainer(lsa, 0.0, rbcpf, 10.0)
436  evr = IMP.container.PairsRestraint(ssps, cpc)
437  rs.add_restraint(evr)
438 
439  '''
440  #G.add_node(text)
441  #G.add_edge(xy, text)
442  #init_pos[text] = xy
443 
444  delTri = scipy.spatial.Delaunay(data)
445  #plt.scatter([e[0] for e in data], [e[1] for e in data])
446  # create a set for edges that are indexes of the points
447  edges = set()
448  # for each Delaunay triangle
449  for n in xrange(delTri.nsimplex):
450  # for each edge of the triangle
451  # sort the vertices
452  # (sorting avoids duplicated edges being added to the set)
453  # and add to the edges set
454  edge = sorted([delTri.vertices[n,0], delTri.vertices[n,1]])
455  edges.add((edge[0], edge[1]))
456  edge = sorted([delTri.vertices[n,0], delTri.vertices[n,2]])
457  edges.add((edge[0], edge[1]))
458  edge = sorted([delTri.vertices[n,1], delTri.vertices[n,2]])
459  edges.add((edge[0], edge[1]))
460 
461 
462  for e in edges:
463  G.add_edge(e[0],e[1])
464 
465  d1=IMP.core.XYZ(particles[e[0]])
466  d2=IMP.core.XYZ(particles[e[1]])
467  dist=IMP.core.get_distance(d1,d2)
468  ts1 = IMP.core.Harmonic(dist+150, 0.001)
469  rs.add_restraint(
470  IMP.core.DistanceRestraint(m, ts1,
471  particles[e[0]],
472  particles[e[1]]))
473  '''
474 
475  smv = IMP.core.SerialMover(mvs)
476 
477  mc = IMP.core.MonteCarlo(m)
478  mc.set_scoring_function(rs)
479  mc.set_return_best(False)
480  mc.set_kt(1.0)
481  mc.add_mover(smv)
482 
483 
484  '''
485  pos = nx.spring_layout(G ,pos=init_pos)
486  x_list=[]
487  y_list=[]
488  for i,xy in enumerate(data):
489  xynew = pos[i] * [xmax-xmin, ymax-ymin] + [xmin, ymin]
490  plt.plot((xy[0],xynew[0]), (xy[1],xynew[1]), 'k-')
491  x_list.append(xynew[0])
492  y_list.append(xynew[1])
493  #plt.annotate(name, xy, size=5, xycoords='data', xytext=xytext, textcoords='data', bbox=dict(boxstyle='round,pad=0.2', fc='yellow', alpha=0.3),\
494  # arrowprops=dict(arrowstyle="-", connectionstyle="arc3", color='gray'))
495 
496  points=ax.scatter(x_list,y_list,alpha=1)
497  #pos*=[numpy.ptp([e[0] for e in data]), numpy.ptp([e[1] for e in data])]
498  #plt.show()
499  return points
500  '''
501 
502  for i in range(10):
503  mc.optimize(1000*len(particles.keys())*2)
504  print(rs.evaluate(False))
505  x_list=[]
506  y_list=[]
507  for i,xy in enumerate(data):
508  p=particles[i]
509  coord=IMP.core.XYZ(p).get_coordinates()
510  xynew = (coord[0],coord[1])
511  plt.plot((xy[0],xynew[0]), (xy[1],xynew[1]), 'k-')
512  x_list.append(xynew[0])
513  y_list.append(xynew[1])
514  points=ax.scatter(x_list,y_list,alpha=1,facecolors='none', edgecolors='k')
515  return points
516 
517 
518 
519  def show_mpld3(self,fig,ax,points,xl_list,xl_labels):
520  import mpld3
521  from mpld3 import plugins
522  import pandas as pd
523 
524  # Define some CSS to control our custom labels
525  css = """
526  table
527  {
528  border-collapse: collapse;
529  }
530  th
531  {
532  color: #000000;
533  background-color: #ffffff;
534  }
535  td
536  {
537  background-color: #cccccc;
538  }
539  table, th, td
540  {
541  font-family:Arial, Helvetica, sans-serif;
542  border: 1px solid black;
543  text-align: right;
544  font-size: 10px;
545  }
546  """
547  df = pd.DataFrame(index=xl_labels)
548 
549  sorted_keys=sorted(xl_list[0].keys())
550 
551  for k in sorted_keys:
552  df[k] = np.array([xl[k] for xl in xl_list])
553 
554  labels = []
555  for i in range(len(xl_labels)):
556  label = df.ix[[i], :].T
557  # .to_html() is unicode; so make leading 'u' go away with str()
558  labels.append(str(label.to_html()))
559 
560  tooltip = plugins.PointHTMLTooltip(points, labels,
561  voffset=10, hoffset=10, css=css)
562  plugins.connect(fig, tooltip)
563  mpld3.save_html(fig,"output.html")
564 
565  def save_xl_distances(self,filename):
566 
567  import csv
568 
569  data=[]
570  sorted_ids=None
571  sorted_group_ids=sorted(self.cross_link_db.data_base.keys())
572  for group in sorted_group_ids:
573  group_block=[]
574  group_dists=[]
575  for xl in self.cross_link_db.data_base[group]:
576  if not sorted_ids:
577  sorted_ids=sorted(xl.keys())
578  data.append(sorted_ids+["UniqueID","Distance","MinAmbiguousDistance"])
579  (c1,c2,r1,r2)=IMP.pmi.io.crosslink._ProteinsResiduesArray(xl)
580  mdist=self._get_distance(r1,c1,r2,c2)
581  values=[xl[k] for k in sorted_ids]
582  values+=[group,mdist]
583  group_dists.append(mdist)
584  group_block.append(values)
585  for l in group_block:
586  l.append(min(group_dists))
587  data+=group_block
588 
589  with open(filename, 'w') as fp:
590  a = csv.writer(fp, delimiter=',')
591  a.writerows(data)
592 
593  def save_rmf_snapshot(self,filename):
594 
595  sorted_ids=None
596  sorted_group_ids=sorted(self.cross_link_db.data_base.keys())
597  list_of_pairs=[]
598  for group in sorted_group_ids:
599  group_xls=[]
600  group_dists_particles=[]
601  for xl in self.cross_link_db.data_base[group]:
602  xllabel=self.cross_link_db.get_short_cross_link_string(xl)
603  (c1,c2,r1,r2)=IMP.pmi.io.crosslink._ProteinsResiduesArray(xl)
604  try:
605  (mdist,p1,p2)=self._get_distance_and_particle_pair(r1,c1,r2,c2)
606  except TypeError:
607  continue
608  group_dists_particles.append((mdist,p1,p2,xllabel))
609  if group_dists_particles:
610  (minmdist,minp1,minp2,minxllabel)=min(group_dists_particles, key = lambda t: t[0])
611  list_of_pairs.append((minp1,minp2,xllabel))
612  else:
613  continue
614 
615  m=self.prots[0].get_model()
616  linear = IMP.core.Linear(0, 0.0)
617  linear.set_slope(1.0)
618  dps2 = IMP.core.DistancePairScore(linear)
619  rslin = IMP.RestraintSet(m, 'linear_dummy_restraints')
620 
621  for pair in list_of_pairs:
622  pr = IMP.core.PairRestraint(m, dps2, (pair[0], pair[1]))
623  pr.set_name(pair[2])
624  rslin.add_restraint(pr)
625 
626  rh = RMF.create_rmf_file(filename)
627  IMP.rmf.add_hierarchies(rh, self.prots)
628  IMP.rmf.add_restraints(rh,[rslin])
630  del rh
631 
632 
633  def plot_table(self, prot_listx=None,
634  prot_listy=None,
635  no_dist_info=False,
636  confidence_info=False,
637  filter=None,
638  display_residue_pairs=False,
639  contactmap=False,
640  filename=None,
641  confidence_classes=None,
642  alphablend=0.1,
643  scale_symbol_size=1.0,
644  gap_between_components=0,
645  dictionary_of_gaps={},
646  colormap=cm.binary,
647  crosslink_threshold=None,
648  colornorm=None,
649  cbar_labels=None,
650  labels=False,
651  show_mpld3=False,
652  color_crosslinks_by_distance=True):
653  """ plot the xlink table with optional contact map.
654  prot_listx: list of protein names on the x-axis
655  prot_listy: list of protein names on the y-axis
656  no_dist_info: plot only the cross-links as grey spots
657  confidence_info:
658  filter: list of tuples to filter on. each one contains:
659  keyword in the database to be filtered on
660  relationship ">","==","<"
661  a value
662  example ("ID_Score",">",40)
663  display_residue_pairs: display all pairs defined in self.residue_pair_list
664  contactmap: display the contact map
665  filename: save to file (adds .pdf extension)
666  confidence_classes:
667  alphablend:
668  scale_symbol_size: rescale the symbol for the crosslink
669  gap_between_components: the numbeber of residues to leave blannk between each component
670  dictionary_of_gaps: add extra space after the given protein. dictionary_of_gaps={prot_name:extra_gap}
671  """
672  # prepare figure
673 
674 
675  fig = plt.figure(figsize=(100, 100))
676  ax = fig.add_subplot(111)
677  ax.set_xticks([])
678  ax.set_yticks([])
679 
680  if cbar_labels is not None:
681  if len(cbar_labels)!=4:
682  print("to provide cbar labels, give 3 fields (first=first input file, last=last input) in oppose order of input contact maps")
683  exit()
684  # set the list of proteins on the x axis
685  if prot_listx is None:
686  prot_listx = self.sequence_dict.keys()
687  prot_listx.sort()
688  nresx = gap_between_components + \
689  sum([len(self.sequence_dict[name])
690  + gap_between_components for name in prot_listx]) + \
691  sum([dictionary_of_gaps[prot] for prot in dictionary_of_gaps.keys()])
692 
693  # set the list of proteins on the y axis
694  if prot_listy is None:
695  prot_listy = self.sequence_dict.keys()
696  prot_listy.sort()
697  nresy = gap_between_components + \
698  sum([len(self.sequence_dict[name])
699  + gap_between_components for name in prot_listy]) + \
700  sum([dictionary_of_gaps[prot] for prot in dictionary_of_gaps.keys()])
701 
702  # this is the residue offset for each protein
703  resoffsetx = {}
704  resendx = {}
705  res = gap_between_components
706  for prot in prot_listx:
707  resoffsetx[prot] = res
708  res += len(self.sequence_dict[prot])
709  resendx[prot] = res
710  res += gap_between_components
711  if prot in dictionary_of_gaps.keys(): res+= dictionary_of_gaps[prot]
712 
713  resoffsety = {}
714  resendy = {}
715  res = gap_between_components
716  for prot in prot_listy:
717  resoffsety[prot] = res
718  res += len(self.sequence_dict[prot])
719  resendy[prot] = res
720  res += gap_between_components
721  if prot in dictionary_of_gaps.keys(): res+= dictionary_of_gaps[prot]
722 
723  resoffsetdiagonal = {}
724  res = gap_between_components
725  for prot in IMP.pmi.io.utilities.OrderedSet(prot_listx + prot_listy):
726  resoffsetdiagonal[prot] = res
727  res += len(self.sequence_dict[prot])
728  res += gap_between_components
729  if prot in dictionary_of_gaps.keys(): res+= dictionary_of_gaps[prot]
730 
731  # plot protein boundaries
732  xticks = []
733  xlabels = []
734  for n, prot in enumerate(prot_listx):
735  res = resoffsetx[prot]
736  end = resendx[prot]
737  for proty in prot_listy:
738  resy = resoffsety[proty]
739  endy = resendy[proty]
740  ax.plot([res, res], [resy, endy], linestyle='-',color='gray', lw=0.4)
741  ax.plot([end, end], [resy, endy], linestyle='-',color='gray', lw=0.4)
742  xticks.append((float(res) + float(end)) / 2)
743  xlabels.append(prot)
744 
745  yticks = []
746  ylabels = []
747  for n, prot in enumerate(prot_listy):
748  res = resoffsety[prot]
749  end = resendy[prot]
750  for protx in prot_listx:
751  resx = resoffsetx[protx]
752  endx = resendx[protx]
753  ax.plot([resx, endx], [res, res], linestyle='-',color='gray', lw=0.4)
754  ax.plot([resx, endx], [end, end], linestyle='-',color='gray', lw=0.4)
755  yticks.append((float(res) + float(end)) / 2)
756  ylabels.append(prot)
757 
758  # plot the contact map
759  if contactmap:
760  tmp_array = np.zeros((nresx, nresy))
761  for px in prot_listx:
762  for py in prot_listy:
763  resx = resoffsetx[px]
764  lengx = resendx[px] - 1
765  resy = resoffsety[py]
766  lengy = resendy[py] - 1
767  indexes_x = self.index_dict[px]
768  minx = min(indexes_x)
769  maxx = max(indexes_x)
770  indexes_y = self.index_dict[py]
771  miny = min(indexes_y)
772  maxy = max(indexes_y)
773  tmp_array[resx:lengx,resy:lengy] = self.contact_freqs[minx:maxx,miny:maxy]
774 
775  cax = ax.imshow(tmp_array,
776  cmap=colormap,
777  norm=colornorm,
778  origin='lower',
779  alpha=0.6,
780  interpolation='nearest')
781 
782  ax.set_xticks(xticks)
783  ax.set_xticklabels(xlabels, rotation=90)
784  ax.set_yticks(yticks)
785  ax.set_yticklabels(ylabels)
786  plt.setp(ax.get_xticklabels(), fontsize=30)
787  plt.setp(ax.get_yticklabels(), fontsize=30)
788 
789  # set the crosslinks
790  already_added_xls = []
791  xl_coordinates_tuple_list = []
792  xl_labels = []
793  x_list=[]
794  y_list=[]
795  color_list=[]
796  xl_list=[]
797 
798  markersize = 5 * scale_symbol_size
799  for xl in self.cross_link_db:
800 
801  (c1,c2,r1,r2)=IMP.pmi.io.crosslink._ProteinsResiduesArray(xl)
802  label=xl[self.cross_link_db.unique_sub_id_key]
803  if color_crosslinks_by_distance:
804 
805  try:
806  mdist=self._get_distance(r1,c1,r2,c2)
807  if mdist is None: continue
808  color = self._colormap_distance(mdist,threshold=crosslink_threshold)
809  except KeyError:
810  color="gray"
811 
812  else:
813 
814  try:
815  ps=self._get_percentage_satisfaction(r1,c1,r2,c2)
816  if ps is None: continue
817  color = self._colormap_satisfaction(ps,threshold=0.2,tolerance=0.1)
818  except KeyError:
819  color="gray"
820 
821  try:
822  pos1 = r1 + resoffsetx[c1]
823  except:
824  continue
825  try:
826  pos2 = r2 + resoffsety[c2]
827  except:
828  continue
829 
830 
831  # everything below is used for plotting the diagonal
832  # when you have a rectangolar plots
833  pos_for_diagonal1 = r1 + resoffsetdiagonal[c1]
834  pos_for_diagonal2 = r2 + resoffsetdiagonal[c2]
835  if confidence_info:
836  if confidence == '0.01':
837  markersize = 14 * scale_symbol_size
838  elif confidence == '0.05':
839  markersize = 9 * scale_symbol_size
840  elif confidence == '0.1':
841  markersize = 6 * scale_symbol_size
842  else:
843  markersize = 15 * scale_symbol_size
844  else:
845  markersize = 5 * scale_symbol_size
846  '''
847  ax.plot([pos1],
848  [pos2],
849  'o',
850  c=color,
851  alpha=alphablend,
852  markersize=markersize)
853 
854  ax.plot([pos2],
855  [pos1],
856  'o',
857  c=color,
858  alpha=alphablend,
859  markersize=markersize)
860  '''
861  x_list.append(pos1)
862  x_list.append(pos2)
863  y_list.append(pos2)
864  y_list.append(pos1)
865  color_list.append(color)
866  color_list.append(color)
867  xl["Distance"]=mdist
868  xl_list.append(xl)
869  xl_list.append(xl)
870 
871  xl_labels.append(label)
872  xl_coordinates_tuple_list.append((float(pos1),float(pos2)))
873  xl_labels.append(label+"*")
874  xl_coordinates_tuple_list.append((float(pos2),float(pos1)))
875 
876  points=ax.scatter(x_list,y_list,s=markersize,c=color_list,alpha=alphablend)
877 
878  # plot requested residue pairs
879  if display_residue_pairs:
880  for rp in self.residue_pair_list:
881  r1=rp[0]
882  c1=rp[1]
883  r2=rp[2]
884  c2=rp[3]
885 
886  try:
887  dist=self._get_distance(r1,c1,r2,c2)
888  except:
889  continue
890 
891  if dist<=40.0:
892  print(rp)
893  try:
894  pos1 = r1 + resoffsetx[c1]
895  except:
896  continue
897  try:
898  pos2 = r2 + resoffsety[c2]
899  except:
900  continue
901 
902  ax.plot([pos1],
903  [pos2],
904  '+',
905  c="blue",
906  alpha=0.1,
907  markersize=markersize)
908 
909  # display and write to file
910  fig.set_size_inches(0.002 * nresx, 0.002 * nresy)
911  [i.set_linewidth(2.0) for i in ax.spines.itervalues()]
912  if cbar_labels is not None:
913  cbar = fig.colorbar(cax, ticks=[0.5,1.5,2.5,3.5])
914  cbar.ax.set_yticklabels(cbar_labels)# vertically oriented colorbar
915 
916  # set the labels using spring layout
917  if labels:
918  points=self.spring_layout(ax,plt,xl_coordinates_tuple_list, xl_labels)
919 
920  if filename:
921  #plt.show()
922  plt.savefig(filename, dpi=300,transparent="False")
923 
924 
925  if show_mpld3:
926  self.show_mpld3(fig,ax,points,xl_list,xl_labels)
A Monte Carlo optimizer.
Definition: MonteCarlo.h:45
Utility classes and functions for IO.
Definition: utilities.py:1
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
RMF::FrameID save_frame(RMF::FileHandle file, std::string name="")
Save the current state of the linked objects as a new RMF frame.
Set of python classes to create a multi-state, multi-resolution IMP hierarchy.
Upper bound harmonic function (non-zero when feature > mean)
class to read, analyze, and plot xlink data on contact maps Canonical way to read the data: 1) load s...
Definition: xltable.py:26
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def plot_table
plot the xlink table with optional contact map.
Definition: xltable.py:633
def load_rmf_coordinates
read coordinates from a rmf file.
Definition: xltable.py:208
Return all close unordered pairs of particles taken from the SingletonContainer.
def load_pdb_coordinates
read coordinates from a pdb file.
Definition: xltable.py:170
Distance restraint between two particles.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:87
void read_pdb(TextInput input, int model, Hierarchy h)
Object used to hold a set of restraints.
Definition: RestraintSet.h:36
def setup_contact_map
loop through each distance map and get frequency of contacts
Definition: xltable.py:338
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
def load_sequence_from_fasta_file
read sequence.
Definition: xltable.py:154
ParticleIndexPairs get_indexes(const ParticlePairsTemp &ps)
def set_residue_pairs_to_display
select the atom names of residue pairs to plot on the contact map list of residues types must be sing...
Definition: xltable.py:323
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Store a list of ParticleIndexes.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
def load_crosslinks
read crosslinks from a CSV file.
Definition: xltable.py:298
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name.
void add_hierarchies(RMF::NodeHandle fh, const atom::Hierarchies &hs)
Linear function
Definition: Linear.h:19
void add_restraints(RMF::NodeHandle fh, const Restraints &hs)
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
Modify a set of continuous variables using a normal distribution.
Definition: NormalMover.h:20
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Class to handle individual model particles.
Definition: Particle.h:37
Applies a PairScore to a Pair.
Definition: PairRestraint.h:29
Select all CA ATOM records.
Definition: pdb.h:77
static const FloatKeys & get_xyz_keys()
Get a vector containing the keys for x,y,z.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Support for the RMF file format for storing hierarchical molecular data and markup.
Applies a list of movers one at a time.
Definition: SerialMover.h:23
Applies a PairScore to each Pair in a list.
Perform more efficient close pair finding when rigid bodies are involved.
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27