IMP logo
IMP Reference Guide  2.9.0
The Integrative Modeling Platform
xltable.py
1 """@namespace IMP.pmi.io.xltable
2  Tools to plot a contact map overlaid with cross-links.
3 """
4 
5 from __future__ import print_function
6 from math import sqrt
7 from scipy.spatial.distance import cdist
8 import numpy as np
9 import matplotlib as mpl
10 mpl.use('Agg')
11 import matplotlib.cm as cm
12 import matplotlib.pyplot as plt
13 
14 from collections import defaultdict
15 import pickle
16 import IMP
17 import IMP.atom
18 import IMP.rmf
19 import RMF
20 import IMP.pmi
21 import IMP.pmi.io
24 import IMP.pmi.topology
25 import IMP.pmi.analysis
26 import operator
27 
28 @IMP.deprecated_object("2.8", "Use PMI2, IMP.pmi.macros.AnalysisReplicaExchange, instead")
29 class XLTable():
30  """ class to read, analyze, and plot xlink data on contact maps
31  Canonical way to read the data:
32  1) load sequences and name them
33  2) load coordinates for those sequences from PDB file
34  3) add crosslinks
35  4) create contact map
36  5) plot
37  """
38 
39  def __init__(self,contact_threshold):
40  self.sequence_dict={}
41  self.cross_link_db = None
42  self.residue_pair_list = [] # list of special residue pairs to display
43  self.distance_maps = [] # distance map for each copy of the complex
44  self.contact_freqs = None
45  self.num_pdbs = 0
46  self.num_rmfs = 0
47  self.index_dict = defaultdict(list) # location in the dmap of each residue
48  self.contact_threshold = contact_threshold
49  # internal things
50  self._first = True
51  self.index_dict={}
52  self.stored_dists={}
53  self.mdl = IMP.Model()
54 
55  def _colormap_distance(self, dist, threshold=35, tolerance=0):
56  if dist < threshold - tolerance:
57  return "Green"
58  elif dist >= threshold + tolerance:
59  return "Orange"
60  else:
61  return "Red"
62 
63  def _colormap_satisfaction(self, sat, threshold=0.5, tolerance=0.1):
64  if sat >= threshold + tolerance:
65  print(sat, "green")
66  return "Green"
67  elif sat < threshold + tolerance and sat >= threshold - tolerance :
68  print(sat, "orange")
69  return "Orange"
70  else:
71  print(sat, "orange")
72  return "Orange"
73 
74  def _get_percentage_satisfaction(self,r1,c1,r2,c2,threshold=35):
75  try:
76  idx1=self.index_dict[c1][r1]
77  except:
78  return None
79  try:
80  idx2=self.index_dict[c2][r2]
81  except:
82  return None
83  nsatisfied=0
84  for dists in self.dist_maps:
85  dist=dists[idx1,idx2]
86  if dist<threshold: nsatisfied+=1
87  return float(nsatisfied)/len(self.dist_maps)
88 
89  def _get_distance(self,r1,c1,r2,c2):
90  if self.index_dict is not None:
91  try:
92  idx1=self.index_dict[c1][r1]
93  except:
94  return None
95  try:
96  idx2=self.index_dict[c2][r2]
97  except:
98  return None
99  return self.av_dist_map[idx1,idx2]
100  else:
101  if (r1,c1,r2,c2) not in self.stored_dists.keys():
102  sel=IMP.atom.Selection(self.prots,molecule=c1,residue_index=r1)
103  selpart=sel.get_selected_particles()
104  selpart_res_one=list(set(self.particles_resolution_one) & set(selpart))
105  if len(selpart_res_one)>1: return None
106  if len(selpart_res_one)==0: return None
107  selpart_res_one_1=selpart_res_one[0]
108  sel=IMP.atom.Selection(self.prots,molecule=c2,residue_index=r2)
109  selpart=sel.get_selected_particles()
110  selpart_res_one=list(set(self.particles_resolution_one) & set(selpart))
111  if len(selpart_res_one)>1: return None
112  if len(selpart_res_one)==0: return None
113  selpart_res_one_2=selpart_res_one[0]
114  d1=IMP.core.XYZ(selpart_res_one_1)
115  d2=IMP.core.XYZ(selpart_res_one_2)
116  dist=IMP.core.get_distance(d1,d2)
117  self.stored_dists[(r1,c1,r2,c2)]=dist
118  else:
119  dist=self.stored_dists[(r1,c1,r2,c2)]
120  return dist
121 
122 
123 
124 
125  def _internal_load_maps(self,maps_fn):
126  npzfile = np.load(maps_fn)
127  cname_array=npzfile['cname_array']
128  idx_array=npzfile['idx_array']
129  index_dict={}
130  for cname,idxs in zip(cname_array,idx_array):
131  tmp=list(idxs)
132  if -1 in tmp:
133  index_dict[cname]=tmp[0:tmp.index(-1)]
134  else:
135  index_dict[cname]=tmp
136  av_dist_map = npzfile['av_dist_map']
137  contact_map = npzfile['contact_map']
138  return index_dict,av_dist_map,contact_map
139 
140  def load_sequence_from_fasta_file(self,fasta_file,id_in_fasta_file,protein_name):
141  """ read sequence. structures are displayed in the same order as sequences are read.
142  fasta_file: file to read
143  id_in_fasta_file: id of desired sequence
144  protein_name: identifier for this sequence (use same name when handling coordinates)
145  can provide the fasta name (for retrieval) and the protein name (for storage) """
146 
147  record_dict = IMP.pmi.topology.Sequences(fasta_file)
148  if id_in_fasta_file is None:
149  id_in_fasta_file = name
150  if id_in_fasta_file not in record_dict:
151  raise KeyError("id %s not found in fasta file" % id_in_fasta_file)
152  length = len(record_dict[id_in_fasta_file])
153 
154  self.sequence_dict[protein_name] = str(record_dict[id_in_fasta_file])
155 
156  def load_pdb_coordinates(self,pdbfile,chain_to_name_map):
157  """ read coordinates from a pdb file. also appends to distance maps
158  @param pdbfile file for reading coords
159  @param chain_to_name_map correspond chain ID with protein name (will ONLY read these chains)
160  Key: PDB chain ID. Value: Protein name (set in sequence reading)
161  \note This function returns an error if the sequence for each chain has NOT been read
162  """
163  mh = IMP.atom.read_pdb(pdbfile,self.mdl,IMP.atom.CAlphaPDBSelector())
164  total_len = sum(len(self.sequence_dict[s]) for s in self.sequence_dict)
165  coords = np.ones((total_len,3)) * 1e5 #default to coords "very far away"
166  prev_stop = 0
167  for cid in chain_to_name_map:
168  cname = chain_to_name_map[cid]
169  if cname not in self.sequence_dict:
170  print("ERROR: chain",cname,'has not been read or has a naming mismatch')
171  return
172  if self._first:
173  self.index_dict[cname]=range(prev_stop,prev_stop+len(self.sequence_dict[cname]))
174  sel = IMP.atom.Selection(mh,chain_id=cid)
175  for p in sel.get_selected_particles():
177  coords[rnum+prev_stop-1,:] = list(IMP.core.XYZ(p).get_coordinates())
178  prev_stop+=len(self.sequence_dict[cname])
179  dists = cdist(coords, coords)
180  binary_dists = np.where((dists <= self.contact_threshold) & (dists >= 1.0), 1.0, 0.0)
181  if self._first:
182  self.dist_maps= [dists]
183  self.av_dist_map = dists
184  self.contact_freqs = binary_dists
185  self._first=False
186  else:
187  self.dist_maps.append(dists)
188  self.av_dist_map += dists
189  self.contact_freqs += binary_dists
190  IMP.atom.destroy(mh)
191  del mh
192  self.num_pdbs+=1
193 
194  def load_rmf_coordinates(self,rmf_name,rmf_frame_index, chain_names, nomap=False):
195  """ read coordinates from a rmf file. It needs IMP to run.
196  rmf has been created using IMP.pmi conventions. It gets the
197  highest resolution automatically. Also appends to distance maps
198  @param rmf_name file for reading coords
199  @param rmf_frame_index frame index from the rmf
200  @param nomap Default False, if True it will not calculate the contact map
201  """
202  (particles_resolution_one, prots)=self._get_rmf_structure(rmf_name,rmf_frame_index)
203 
204  total_len = sum(len(self.sequence_dict[s]) for s in self.sequence_dict)
205  print(self.sequence_dict,total_len)
206 
207  coords = np.ones((total_len,3)) * 1e6 #default to coords "very far away"
208  prev_stop=0
209  sorted_particles=IMP.pmi.tools.sort_by_residues(particles_resolution_one)
210 
211  self.prots=prots
212  self.particles_resolution_one=particles_resolution_one
213 
214  if nomap:
215  return
216 
217  for cname in chain_names:
218  print(cname)
219  if self._first:
220  self.index_dict[cname]=range(prev_stop,prev_stop+len(self.sequence_dict[cname]))
221  rindexes=range(1,len(self.sequence_dict[cname])+1)
222  for rnum in rindexes:
223  sel=IMP.atom.Selection(prots,molecule=cname,residue_index=rnum)
224  selpart=sel.get_selected_particles()
225  selpart_res_one=list(set(particles_resolution_one) & set(selpart))
226  if len(selpart_res_one)>1: continue
227  if len(selpart_res_one)==0: continue
228  selpart_res_one=selpart_res_one[0]
229  try:
230  coords[rnum+prev_stop-1,:]=IMP.core.XYZ(selpart_res_one).get_coordinates()
231  except IndexError:
232  print("Error: exceed max size",prev_stop,total_len,cname,rnum)
233  exit()
234  prev_stop+=len(self.sequence_dict[cname])
235  dists = cdist(coords, coords)
236  binary_dists = np.where((dists <= self.contact_threshold) & (dists >= 1.0), 1.0, 0.0)
237  if self._first:
238  self.dist_maps= [dists]
239  self.av_dist_map = dists
240  self.contact_freqs = binary_dists
241  self._first=False
242  else:
243  self.dist_maps.append(dists)
244  self.av_dist_map += dists
245  self.contact_freqs += binary_dists
246  self.num_rmfs+=1
247 
248 
249  def _get_rmf_structure(self,rmf_name,rmf_frame_index):
250  rh= RMF.open_rmf_file_read_only(rmf_name)
251  prots=IMP.rmf.create_hierarchies(rh, self.mdl)
252  IMP.rmf.load_frame(rh, rmf_frame_index)
253  print("getting coordinates for frame %i rmf file %s" % (rmf_frame_index, rmf_name))
254  del rh
255 
257 
258  protein_names=particle_dict.keys()
259  particles_resolution_one=[]
260  for k in particle_dict:
261  particles_resolution_one+=(particle_dict[k])
262 
263  return particles_resolution_one, prots
264 
265 
266  def save_maps(self,maps_fn):
267  maxlen=max(len(self.index_dict[key]) for key in self.index_dict)
268  cnames=[]
269  idxs=[]
270  for cname,idx in self.index_dict.iteritems():
271  cnames.append(cname)
272  idxs.append(idx+[-1]*(maxlen-len(idx)))
273  idx_array=np.array(idxs)
274  cname_array=np.array(cnames)
275  np.savez(maps_fn,
276  cname_array=cname_array,
277  idx_array=idx_array,
278  av_dist_map=self.av_dist_map,
279  contact_map=self.contact_freqs)
280 
281  def load_maps(self,maps_fn):
282  self.index_dict,self.av_dist_map,self.contact_freqs=self._internal_load_maps(maps_fn)
283 
284  def load_crosslinks(self,CrossLinkDataBase,distance_field=None):
285  """ read crosslinks from a CSV file.
286  provide a CrossLinkDataBaseKeywordsConverter to explain the columns
287  @distance_field is the optional keyword for the distance to be read form the file.
288  This can skip the rmf reading to calculate the distance of cross-links if
289  already provided in the csv file."""
290  if type(CrossLinkDataBase) is not IMP.pmi.io.crosslink.CrossLinkDataBase:
291  raise TypeError("Crosslink database must be a IMP.pmi.io.CrossLinkDataBase type")
292  self.cross_link_db=CrossLinkDataBase
293  if distance_field is not None:
294  total_len = sum(len(self.sequence_dict[s]) for s in self.sequence_dict)
295  zeros = np.zeros((total_len,3))
296  self.av_dist_map = cdist(zeros,zeros)
297  for xl in self.cross_link_db:
298  c1=xl[self.cross_link_db.protein1_key]
299  c2=xl[self.cross_link_db.protein2_key]
300  r1=xl[self.cross_link_db.residue1_key]
301  r2=xl[self.cross_link_db.residue2_key]
302  try:
303  self.stored_dists[(r1,c1,r2,c2)]=float(xl[distance_field])
304  self.stored_dists[(r2,c2,r1,c1)]=float(xl[distance_field])
305  except ValueError:
306  self.stored_dists[(r1,c1,r2,c2)]=10e6
307  self.stored_dists[(r2,c2,r1,c1)]=10e6
308  else:
309  for xl in self.cross_link_db:
310  c1=xl[self.cross_link_db.protein1_key]
311  c2=xl[self.cross_link_db.protein2_key]
312  r1=xl[self.cross_link_db.residue1_key]
313  r2=xl[self.cross_link_db.residue2_key]
314  self.stored_dists[(r1,c1,r2,c2)]=self._get_distance(r1,c1,r2,c2)
315  self.stored_dists[(r2,c2,r1,c1)]=self._get_distance(r2,c2,r1,c1)
316 
317  def set_residue_pairs_to_display(self,residue_type_pair):
318  """ select the atom names of residue pairs to plot on the contact map
319  list of residues types must be single letter code
320  e.g. residue_type_pair=("K","K")
321  """
322  rtp=sorted(residue_type_pair)
323  for prot1 in self.sequence_dict:
324  seq1=self.sequence_dict[prot1]
325  for nres1,res1 in enumerate(seq1):
326  for prot2 in self.sequence_dict:
327  seq2=self.sequence_dict[prot2]
328  for nres2,res2 in enumerate(seq2):
329  if sorted((res1,res2))==rtp:
330  self.residue_pair_list.append((nres1+1,prot1,nres2+1,prot2))
331 
332  def setup_contact_map(self):
333  """ loop through each distance map and get frequency of contacts
334  """
335  if self.num_pdbs!=0 and self.num_rmfs==0:
336  self.av_dist_map = 1.0/self.num_pdbs * self.av_dist_map
337  self.contact_freqs = 1.0/self.num_pdbs * self.contact_freqs
338  if self.num_pdbs==0 and self.num_rmfs!=0:
339  self.av_dist_map = 1.0/self.num_rmfs * self.av_dist_map
340  self.contact_freqs = 1.0/self.num_rmfs * self.contact_freqs
341 
342  def setup_difference_map(self,maps_fn1,maps_fn2,thresh):
343  idx1,av1,contact1=self._internal_load_maps(maps_fn1)
344  idx2,av2,contact2=self._internal_load_maps(maps_fn2)
345  if idx1!=idx2:
346  print("UH OH: index dictionaries do not match!")
347  exit()
348  self.index_dict=idx1
349  self.av_dist_map=av1 # should we store both somehow? only needed for XL
350 
351  def logic(c1,c2):
352  if c1==0 and c2==0: # white
353  return 0
354  elif c1>thresh and c2<thresh: # red
355  return 1
356  elif c1<thresh and c2>thresh: # blue
357  return 2
358  else: # green
359  return 3
360  f = np.vectorize(logic,otypes=[np.int])
361  print('computing contact map')
362  self.contact_freqs = f(contact1,contact2)
363  print('done')
364 
365 
366 
367  def spring_layout(self,ax,plt,data, annotations, iterations = 100, k=1):
368 
369  import networkx as nx
370  import numpy
371  import scipy.spatial
372 
373 
374  #import matplotlib.pyplot as plt
375 
376  """
377  Author: G. Bouvier, Pasteur Institute, Paris
378  Website: http://bloggb.fr/2015/10/19/spring_layout_for_annotating_plot_in_matplotlib.html
379  - data: coordinates of your points [(x1,y1), (x2,y2), ..., (xn,yn)]
380  - annotations: text for your annotation ['str_1', 'str_2', ..., 'str_n']
381  - iterations: number of iterations for spring layout
382  - k: optimal distance between nodes
383  """
384  G = nx.Graph()
385  init_pos = {} # initial position
386  x, y = [e[0] for e in data], [e[1] for e in data]
387  xmin, xmax = min(x), max(x)
388  ymin, ymax = min(y), max(y)
389 
390  m=IMP.Model()
391  particles={}
392  particles_fixed={}
393  mvs = []
394 
395  rs = IMP.RestraintSet(m, 'distance')
396 
397  for i,xy in enumerate(data):
398  p=IMP.Particle(m)
399  G.add_node(i)
400  init_pos[i] = xy
402  IMP.core.XYZ(p).set_coordinates((xy[0],xy[1],0))
403  IMP.core.XYZR(p).set_radius(100)
404  IMP.core.XYZ(p).set_coordinates_are_optimized(True)
405  particles[i]=p
406  Xfloatkey = IMP.core.XYZ.get_xyz_keys()[0]
407  Yfloatkey = IMP.core.XYZ.get_xyz_keys()[1]
408  mvs.append(IMP.core.NormalMover([p], [Xfloatkey], 5))
409  mvs.append(IMP.core.NormalMover([p], [Yfloatkey], 5))
410  p=IMP.Particle(m)
412  IMP.core.XYZ(p).set_coordinates((xy[0],xy[1],0))
413  IMP.core.XYZR(p).set_radius(100)
414  particles_fixed[i]=p
415  ts1 = IMP.core.HarmonicUpperBound(200, 0.001)
416  rs.add_restraint(
418  particles[i],
419  particles_fixed[i]))
420 
421  ssps = IMP.core.SoftSpherePairScore(1.0)
423 
424  ps = particles.values()+particles_fixed.values()
425 
426  lsa.add(IMP.get_indexes(ps))
427 
429  cpc = IMP.container.ClosePairContainer(lsa, 0.0, rbcpf, 10.0)
430  evr = IMP.container.PairsRestraint(ssps, cpc)
431  rs.add_restraint(evr)
432 
433  '''
434  #G.add_node(text)
435  #G.add_edge(xy, text)
436  #init_pos[text] = xy
437 
438  delTri = scipy.spatial.Delaunay(data)
439  #plt.scatter([e[0] for e in data], [e[1] for e in data])
440  # create a set for edges that are indexes of the points
441  edges = set()
442  # for each Delaunay triangle
443  for n in xrange(delTri.nsimplex):
444  # for each edge of the triangle
445  # sort the vertices
446  # (sorting avoids duplicated edges being added to the set)
447  # and add to the edges set
448  edge = sorted([delTri.vertices[n,0], delTri.vertices[n,1]])
449  edges.add((edge[0], edge[1]))
450  edge = sorted([delTri.vertices[n,0], delTri.vertices[n,2]])
451  edges.add((edge[0], edge[1]))
452  edge = sorted([delTri.vertices[n,1], delTri.vertices[n,2]])
453  edges.add((edge[0], edge[1]))
454 
455 
456  for e in edges:
457  G.add_edge(e[0],e[1])
458 
459  d1=IMP.core.XYZ(particles[e[0]])
460  d2=IMP.core.XYZ(particles[e[1]])
461  dist=IMP.core.get_distance(d1,d2)
462  ts1 = IMP.core.Harmonic(dist+150, 0.001)
463  rs.add_restraint(
464  IMP.core.DistanceRestraint(m, ts1,
465  particles[e[0]],
466  particles[e[1]]))
467  '''
468 
469  smv = IMP.core.SerialMover(mvs)
470 
471  mc = IMP.core.MonteCarlo(m)
472  mc.set_scoring_function(rs)
473  mc.set_return_best(False)
474  mc.set_kt(1.0)
475  mc.add_mover(smv)
476 
477 
478  '''
479  pos = nx.spring_layout(G ,pos=init_pos)
480  x_list=[]
481  y_list=[]
482  for i,xy in enumerate(data):
483  xynew = pos[i] * [xmax-xmin, ymax-ymin] + [xmin, ymin]
484  plt.plot((xy[0],xynew[0]), (xy[1],xynew[1]), 'k-')
485  x_list.append(xynew[0])
486  y_list.append(xynew[1])
487  #plt.annotate(name, xy, size=5, xycoords='data', xytext=xytext, textcoords='data', bbox=dict(boxstyle='round,pad=0.2', fc='yellow', alpha=0.3),\
488  # arrowprops=dict(arrowstyle="-", connectionstyle="arc3", color='gray'))
489 
490  points=ax.scatter(x_list,y_list,alpha=1)
491  #pos*=[numpy.ptp([e[0] for e in data]), numpy.ptp([e[1] for e in data])]
492  #plt.show()
493  return points
494  '''
495 
496  for i in range(10):
497  mc.optimize(1000*len(particles.keys())*2)
498  print(rs.evaluate(False))
499  x_list=[]
500  y_list=[]
501  for i,xy in enumerate(data):
502  p=particles[i]
503  coord=IMP.core.XYZ(p).get_coordinates()
504  xynew = (coord[0],coord[1])
505  plt.plot((xy[0],xynew[0]), (xy[1],xynew[1]), 'k-')
506  x_list.append(xynew[0])
507  y_list.append(xynew[1])
508  points=ax.scatter(x_list,y_list,alpha=1,facecolors='none', edgecolors='k')
509  return points
510 
511 
512 
513  def show_mpld3(self,fig,ax,points,xl_list,xl_labels):
514  import mpld3
515  from mpld3 import plugins
516  import pandas as pd
517 
518  # Define some CSS to control our custom labels
519  css = """
520  table
521  {
522  border-collapse: collapse;
523  }
524  th
525  {
526  color: #000000;
527  background-color: #ffffff;
528  }
529  td
530  {
531  background-color: #cccccc;
532  }
533  table, th, td
534  {
535  font-family:Arial, Helvetica, sans-serif;
536  border: 1px solid black;
537  text-align: right;
538  font-size: 10px;
539  }
540  """
541  df = pd.DataFrame(index=xl_labels)
542 
543  sorted_keys=sorted(xl_list[0].keys())
544 
545  for k in sorted_keys:
546  df[k] = np.array([xl[k] for xl in xl_list])
547 
548  labels = []
549  for i in range(len(xl_labels)):
550  label = df.ix[[i], :].T
551  # .to_html() is unicode; so make leading 'u' go away with str()
552  labels.append(str(label.to_html()))
553 
554  tooltip = plugins.PointHTMLTooltip(points, labels,
555  voffset=10, hoffset=10, css=css)
556  plugins.connect(fig, tooltip)
557  mpld3.save_html(fig,"output.html")
558 
559 
560  def get_residue_contacts(self,prot_listx,prot_listy):
561  for px in prot_listx:
562  for py in prot_listy:
563  indexes_x = self.index_dict[px]
564  minx = min(indexes_x)
565  maxx = max(indexes_x)
566  indexes_y = self.index_dict[py]
567  miny = min(indexes_y)
568  maxy = max(indexes_y)
569  array = self.contact_freqs[minx:maxx,miny:maxy]
570  (xresidues,yresidues)=np.where(array>0)
571  for n,xr in enumerate(xresidues):
572  print(xr,yresidues[n],px,py,array[xr,yresidues[n]])
573 
574 
575  def plot_table(self, prot_listx=None,
576  prot_listy=None,
577  no_dist_info=False,
578  confidence_info=False,
579  filter=None,
580  display_residue_pairs=False,
581  contactmap=False,
582  filename=None,
583  confidence_classes=None,
584  alphablend=0.1,
585  scale_symbol_size=1.0,
586  gap_between_components=0,
587  dictionary_of_gaps={},
588  colormap=cm.binary,
589  crosslink_threshold=None,
590  colornorm=None,
591  cbar_labels=None,
592  labels=False,
593  show_mpld3=False,
594  color_crosslinks_by_distance=True):
595  """ plot the xlink table with optional contact map.
596  prot_listx: list of protein names on the x-axis
597  prot_listy: list of protein names on the y-axis
598  no_dist_info: plot only the cross-links as grey spots
599  confidence_info:
600  filter: list of tuples to filter on. each one contains:
601  keyword in the database to be filtered on
602  relationship ">","==","<"
603  a value
604  example ("ID_Score",">",40)
605  display_residue_pairs: display all pairs defined in self.residue_pair_list
606  contactmap: display the contact map
607  filename: save to file (adds .pdf extension)
608  confidence_classes:
609  alphablend:
610  scale_symbol_size: rescale the symbol for the crosslink
611  gap_between_components: the numbeber of residues to leave blannk between each component
612  dictionary_of_gaps: add extra space after the given protein. dictionary_of_gaps={prot_name:extra_gap}
613  """
614  # prepare figure
615 
616 
617  fig = plt.figure(figsize=(100, 100))
618  ax = fig.add_subplot(111)
619  ax.set_xticks([])
620  ax.set_yticks([])
621 
622  if cbar_labels is not None:
623  if len(cbar_labels)!=4:
624  print("to provide cbar labels, give 3 fields (first=first input file, last=last input) in oppose order of input contact maps")
625  exit()
626  # set the list of proteins on the x axis
627  if prot_listx is None:
628  prot_listx = self.sequence_dict.keys()
629  prot_listx.sort()
630  nresx = gap_between_components + \
631  sum([len(self.sequence_dict[name])
632  + gap_between_components for name in prot_listx]) + \
633  sum([dictionary_of_gaps[prot] for prot in dictionary_of_gaps.keys()])
634 
635  # set the list of proteins on the y axis
636  if prot_listy is None:
637  prot_listy = self.sequence_dict.keys()
638  prot_listy.sort()
639  nresy = gap_between_components + \
640  sum([len(self.sequence_dict[name])
641  + gap_between_components for name in prot_listy]) + \
642  sum([dictionary_of_gaps[prot] for prot in dictionary_of_gaps.keys()])
643 
644  # this is the residue offset for each protein
645  resoffsetx = {}
646  resendx = {}
647  res = gap_between_components
648  for prot in prot_listx:
649  resoffsetx[prot] = res
650  res += len(self.sequence_dict[prot])
651  resendx[prot] = res
652  res += gap_between_components
653  if prot in dictionary_of_gaps.keys(): res+= dictionary_of_gaps[prot]
654 
655  resoffsety = {}
656  resendy = {}
657  res = gap_between_components
658  for prot in prot_listy:
659  resoffsety[prot] = res
660  res += len(self.sequence_dict[prot])
661  resendy[prot] = res
662  res += gap_between_components
663  if prot in dictionary_of_gaps.keys(): res+= dictionary_of_gaps[prot]
664 
665  resoffsetdiagonal = {}
666  res = gap_between_components
667  for prot in IMP.pmi.io.utilities.OrderedSet(prot_listx + prot_listy):
668  resoffsetdiagonal[prot] = res
669  res += len(self.sequence_dict[prot])
670  res += gap_between_components
671  if prot in dictionary_of_gaps.keys(): res+= dictionary_of_gaps[prot]
672 
673  # plot protein boundaries
674  xticks = []
675  xlabels = []
676  for n, prot in enumerate(prot_listx):
677  res = resoffsetx[prot]
678  end = resendx[prot]
679  for proty in prot_listy:
680  resy = resoffsety[proty]
681  endy = resendy[proty]
682  ax.plot([res, res], [resy, endy], linestyle='-',color='gray', lw=0.4)
683  ax.plot([end, end], [resy, endy], linestyle='-',color='gray', lw=0.4)
684  xticks.append((float(res) + float(end)) / 2)
685  xlabels.append(prot)
686 
687  yticks = []
688  ylabels = []
689  for n, prot in enumerate(prot_listy):
690  res = resoffsety[prot]
691  end = resendy[prot]
692  for protx in prot_listx:
693  resx = resoffsetx[protx]
694  endx = resendx[protx]
695  ax.plot([resx, endx], [res, res], linestyle='-',color='gray', lw=0.4)
696  ax.plot([resx, endx], [end, end], linestyle='-',color='gray', lw=0.4)
697  yticks.append((float(res) + float(end)) / 2)
698  ylabels.append(prot)
699 
700  # plot the contact map
701  if contactmap:
702  tmp_array = np.zeros((nresx, nresy))
703  for px in prot_listx:
704  for py in prot_listy:
705  resx = resoffsetx[px]
706  lengx = resendx[px] - 1
707  resy = resoffsety[py]
708  lengy = resendy[py] - 1
709  indexes_x = self.index_dict[px]
710  minx = min(indexes_x)
711  maxx = max(indexes_x)
712  indexes_y = self.index_dict[py]
713  miny = min(indexes_y)
714  maxy = max(indexes_y)
715  tmp_array[resx:lengx,resy:lengy] = self.contact_freqs[minx:maxx,miny:maxy]
716 
717  cax = ax.imshow(tmp_array,
718  cmap=colormap,
719  norm=colornorm,
720  origin='lower',
721  alpha=0.6,
722  interpolation='nearest')
723 
724  ax.set_xticks(xticks)
725  ax.set_xticklabels(xlabels, rotation=90)
726  ax.set_yticks(yticks)
727  ax.set_yticklabels(ylabels)
728  plt.setp(ax.get_xticklabels(), fontsize=6)
729  plt.setp(ax.get_yticklabels(), fontsize=6)
730 
731  # set the crosslinks
732  already_added_xls = []
733  xl_coordinates_tuple_list = []
734  xl_labels = []
735  x_list=[]
736  y_list=[]
737  color_list=[]
738  xl_list=[]
739 
740  markersize = 5 * scale_symbol_size
741  if self.cross_link_db:
742  for xl in self.cross_link_db:
743 
744  (c1,c2,r1,r2)=IMP.pmi.io.crosslink._ProteinsResiduesArray(xl)
745  label=xl[self.cross_link_db.unique_sub_id_key]
746  if color_crosslinks_by_distance:
747 
748  try:
749  mdist=self._get_distance(r1,c1,r2,c2)
750  if mdist is None: continue
751  color = self._colormap_distance(mdist,threshold=crosslink_threshold)
752  except KeyError:
753  color="gray"
754 
755  else:
756 
757  try:
758  ps=self._get_percentage_satisfaction(r1,c1,r2,c2)
759  if ps is None: continue
760  color = self._colormap_satisfaction(ps,threshold=0.2,tolerance=0.1)
761  except KeyError:
762  color="gray"
763 
764  try:
765  pos1 = r1 + resoffsetx[c1]
766  except:
767  continue
768  try:
769  pos2 = r2 + resoffsety[c2]
770  except:
771  continue
772 
773 
774  # everything below is used for plotting the diagonal
775  # when you have a rectangolar plots
776  pos_for_diagonal1 = r1 + resoffsetdiagonal[c1]
777  pos_for_diagonal2 = r2 + resoffsetdiagonal[c2]
778  if confidence_info:
779  if confidence == '0.01':
780  markersize = 14 * scale_symbol_size
781  elif confidence == '0.05':
782  markersize = 9 * scale_symbol_size
783  elif confidence == '0.1':
784  markersize = 6 * scale_symbol_size
785  else:
786  markersize = 15 * scale_symbol_size
787  else:
788  markersize = 5 * scale_symbol_size
789  '''
790  ax.plot([pos1],
791  [pos2],
792  'o',
793  c=color,
794  alpha=alphablend,
795  markersize=markersize)
796 
797  ax.plot([pos2],
798  [pos1],
799  'o',
800  c=color,
801  alpha=alphablend,
802  markersize=markersize)
803  '''
804  x_list.append(pos1)
805  x_list.append(pos2)
806  y_list.append(pos2)
807  y_list.append(pos1)
808  color_list.append(color)
809  color_list.append(color)
810  xl["Distance"]=mdist
811  xl_list.append(xl)
812  xl_list.append(xl)
813 
814  xl_labels.append(label)
815  xl_coordinates_tuple_list.append((float(pos1),float(pos2)))
816  xl_labels.append(label+"*")
817  xl_coordinates_tuple_list.append((float(pos2),float(pos1)))
818 
819  points=ax.scatter(x_list,y_list,s=markersize,c=color_list,alpha=alphablend)
820 
821  # plot requested residue pairs
822  if display_residue_pairs:
823  for rp in self.residue_pair_list:
824  r1=rp[0]
825  c1=rp[1]
826  r2=rp[2]
827  c2=rp[3]
828 
829  try:
830  dist=self._get_distance(r1,c1,r2,c2)
831  except:
832  continue
833 
834  if dist<=40.0:
835  print(rp)
836  try:
837  pos1 = r1 + resoffsetx[c1]
838  except:
839  continue
840  try:
841  pos2 = r2 + resoffsety[c2]
842  except:
843  continue
844 
845  ax.plot([pos1],
846  [pos2],
847  '+',
848  c="blue",
849  alpha=0.1,
850  markersize=markersize)
851 
852  # display and write to file
853  fig.set_size_inches(0.002 * nresx, 0.002 * nresy)
854  [i.set_linewidth(2.0) for i in ax.spines.itervalues()]
855  if cbar_labels is not None:
856  cbar = fig.colorbar(cax, ticks=[0.5,1.5,2.5,3.5])
857  cbar.ax.set_yticklabels(cbar_labels)# vertically oriented colorbar
858 
859  # set the labels using spring layout
860  if labels:
861  points=self.spring_layout(ax,plt,xl_coordinates_tuple_list, xl_labels)
862 
863  if filename:
864  #plt.show()
865  plt.savefig(filename, dpi=300,transparent="False")
866 
867 
868  if show_mpld3:
869  self.show_mpld3(fig,ax,points,xl_list,xl_labels)
A Monte Carlo optimizer.
Definition: MonteCarlo.h:45
Utility classes and functions for IO.
Definition: utilities.py:1
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
Set of python classes to create a multi-state, multi-resolution IMP hierarchy.
Upper bound harmonic function (non-zero when feature > mean)
class to read, analyze, and plot xlink data on contact maps Canonical way to read the data: 1) load s...
Definition: xltable.py:28
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def plot_table
plot the xlink table with optional contact map.
Definition: xltable.py:575
def load_rmf_coordinates
read coordinates from a rmf file.
Definition: xltable.py:194
Return all close unordered pairs of particles taken from the SingletonContainer.
def load_pdb_coordinates
read coordinates from a pdb file.
Definition: xltable.py:156
Distance restraint between two particles.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
void read_pdb(TextInput input, int model, Hierarchy h)
Object used to hold a set of restraints.
Definition: RestraintSet.h:36
def setup_contact_map
loop through each distance map and get frequency of contacts
Definition: xltable.py:332
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
def load_sequence_from_fasta_file
read sequence.
Definition: xltable.py:140
ParticleIndexPairs get_indexes(const ParticlePairsTemp &ps)
def set_residue_pairs_to_display
select the atom names of residue pairs to plot on the contact map list of residues types must be sing...
Definition: xltable.py:317
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Store a list of ParticleIndexes.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
def load_crosslinks
read crosslinks from a CSV file.
Definition: xltable.py:284
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
Modify a set of continuous variables using a normal distribution.
Definition: NormalMover.h:20
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:9803
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Class to handle individual particles of a Model object.
Definition: Particle.h:41
Select all CA ATOM records.
Definition: pdb.h:77
static const FloatKeys & get_xyz_keys()
Get a vector containing the keys for x,y,z.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Support for the RMF file format for storing hierarchical molecular data and markup.
Applies a list of movers one at a time.
Definition: SerialMover.h:23
Applies a PairScore to each Pair in a list.
Perform more efficient close pair finding when rigid bodies are involved.
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27