IMP logo
IMP Reference Guide  2.8.0
The Integrative Modeling Platform
xltable.py
1 """@namespace IMP.pmi.io.xltable
2  Tools to plot a contact map overlaid with cross-links.
3 """
4 
5 from __future__ import print_function
6 from math import sqrt
7 from scipy.spatial.distance import cdist
8 import numpy as np
9 import matplotlib as mpl
10 mpl.use('Agg')
11 import matplotlib.cm as cm
12 import matplotlib.pyplot as plt
13 
14 from collections import defaultdict
15 import pickle
16 import IMP
17 import IMP.atom
18 import IMP.rmf
19 import RMF
20 import IMP.pmi
21 import IMP.pmi.io
24 import IMP.pmi.topology
25 import IMP.pmi.analysis
26 import operator
27 
28 class XLTable():
29  """ class to read, analyze, and plot xlink data on contact maps
30  Canonical way to read the data:
31  1) load sequences and name them
32  2) load coordinates for those sequences from PDB file
33  3) add crosslinks
34  4) create contact map
35  5) plot
36  """
37 
38  def __init__(self,contact_threshold):
39  self.sequence_dict={}
40  self.cross_link_db = None
41  self.residue_pair_list = [] # list of special residue pairs to display
42  self.distance_maps = [] # distance map for each copy of the complex
43  self.contact_freqs = None
44  self.num_pdbs = 0
45  self.num_rmfs = 0
46  self.index_dict = defaultdict(list) # location in the dmap of each residue
47  self.contact_threshold = contact_threshold
48  # internal things
49  self._first = True
50  self.index_dict={}
51  self.stored_dists={}
52  self.mdl = IMP.Model()
53 
54  def _colormap_distance(self, dist, threshold=35, tolerance=0):
55  if dist < threshold - tolerance:
56  return "Green"
57  elif dist >= threshold + tolerance:
58  return "Orange"
59  else:
60  return "Red"
61 
62  def _colormap_satisfaction(self, sat, threshold=0.5, tolerance=0.1):
63  if sat >= threshold + tolerance:
64  print(sat, "green")
65  return "Green"
66  elif sat < threshold + tolerance and sat >= threshold - tolerance :
67  print(sat, "orange")
68  return "Orange"
69  else:
70  print(sat, "orange")
71  return "Orange"
72 
73  def _get_percentage_satisfaction(self,r1,c1,r2,c2,threshold=35):
74  try:
75  idx1=self.index_dict[c1][r1]
76  except:
77  return None
78  try:
79  idx2=self.index_dict[c2][r2]
80  except:
81  return None
82  nsatisfied=0
83  for dists in self.dist_maps:
84  dist=dists[idx1,idx2]
85  if dist<threshold: nsatisfied+=1
86  return float(nsatisfied)/len(self.dist_maps)
87 
88  def _get_distance(self,r1,c1,r2,c2):
89  if self.index_dict is not None:
90  try:
91  idx1=self.index_dict[c1][r1]
92  except:
93  return None
94  try:
95  idx2=self.index_dict[c2][r2]
96  except:
97  return None
98  return self.av_dist_map[idx1,idx2]
99  else:
100  if (r1,c1,r2,c2) not in self.stored_dists.keys():
101  sel=IMP.atom.Selection(self.prots,molecule=c1,residue_index=r1)
102  selpart=sel.get_selected_particles()
103  selpart_res_one=list(set(self.particles_resolution_one) & set(selpart))
104  if len(selpart_res_one)>1: return None
105  if len(selpart_res_one)==0: return None
106  selpart_res_one_1=selpart_res_one[0]
107  sel=IMP.atom.Selection(self.prots,molecule=c2,residue_index=r2)
108  selpart=sel.get_selected_particles()
109  selpart_res_one=list(set(self.particles_resolution_one) & set(selpart))
110  if len(selpart_res_one)>1: return None
111  if len(selpart_res_one)==0: return None
112  selpart_res_one_2=selpart_res_one[0]
113  d1=IMP.core.XYZ(selpart_res_one_1)
114  d2=IMP.core.XYZ(selpart_res_one_2)
115  dist=IMP.core.get_distance(d1,d2)
116  self.stored_dists[(r1,c1,r2,c2)]=dist
117  else:
118  dist=self.stored_dists[(r1,c1,r2,c2)]
119  return dist
120 
121 
122 
123 
124  def _internal_load_maps(self,maps_fn):
125  npzfile = np.load(maps_fn)
126  cname_array=npzfile['cname_array']
127  idx_array=npzfile['idx_array']
128  index_dict={}
129  for cname,idxs in zip(cname_array,idx_array):
130  tmp=list(idxs)
131  if -1 in tmp:
132  index_dict[cname]=tmp[0:tmp.index(-1)]
133  else:
134  index_dict[cname]=tmp
135  av_dist_map = npzfile['av_dist_map']
136  contact_map = npzfile['contact_map']
137  return index_dict,av_dist_map,contact_map
138 
139  def load_sequence_from_fasta_file(self,fasta_file,id_in_fasta_file,protein_name):
140  """ read sequence. structures are displayed in the same order as sequences are read.
141  fasta_file: file to read
142  id_in_fasta_file: id of desired sequence
143  protein_name: identifier for this sequence (use same name when handling coordinates)
144  can provide the fasta name (for retrieval) and the protein name (for storage) """
145 
146  record_dict = IMP.pmi.topology.Sequences(fasta_file)
147  if id_in_fasta_file is None:
148  id_in_fasta_file = name
149  if id_in_fasta_file not in record_dict:
150  raise KeyError("id %s not found in fasta file" % id_in_fasta_file)
151  length = len(record_dict[id_in_fasta_file])
152 
153  self.sequence_dict[protein_name] = str(record_dict[id_in_fasta_file])
154 
155  def load_pdb_coordinates(self,pdbfile,chain_to_name_map):
156  """ read coordinates from a pdb file. also appends to distance maps
157  @param pdbfile file for reading coords
158  @param chain_to_name_map correspond chain ID with protein name (will ONLY read these chains)
159  Key: PDB chain ID. Value: Protein name (set in sequence reading)
160  \note This function returns an error if the sequence for each chain has NOT been read
161  """
162  mh = IMP.atom.read_pdb(pdbfile,self.mdl,IMP.atom.CAlphaPDBSelector())
163  total_len = sum(len(self.sequence_dict[s]) for s in self.sequence_dict)
164  coords = np.ones((total_len,3)) * 1e5 #default to coords "very far away"
165  prev_stop = 0
166  for cid in chain_to_name_map:
167  cname = chain_to_name_map[cid]
168  if cname not in self.sequence_dict:
169  print("ERROR: chain",cname,'has not been read or has a naming mismatch')
170  return
171  if self._first:
172  self.index_dict[cname]=range(prev_stop,prev_stop+len(self.sequence_dict[cname]))
173  sel = IMP.atom.Selection(mh,chain_id=cid)
174  for p in sel.get_selected_particles():
176  coords[rnum+prev_stop-1,:] = list(IMP.core.XYZ(p).get_coordinates())
177  prev_stop+=len(self.sequence_dict[cname])
178  dists = cdist(coords, coords)
179  binary_dists = np.where((dists <= self.contact_threshold) & (dists >= 1.0), 1.0, 0.0)
180  if self._first:
181  self.dist_maps= [dists]
182  self.av_dist_map = dists
183  self.contact_freqs = binary_dists
184  self._first=False
185  else:
186  self.dist_maps.append(dists)
187  self.av_dist_map += dists
188  self.contact_freqs += binary_dists
189  IMP.atom.destroy(mh)
190  del mh
191  self.num_pdbs+=1
192 
193  def load_rmf_coordinates(self,rmf_name,rmf_frame_index, chain_names, nomap=False):
194  """ read coordinates from a rmf file. It needs IMP to run.
195  rmf has been created using IMP.pmi conventions. It gets the
196  highest resolution automatically. Also appends to distance maps
197  @param rmf_name file for reading coords
198  @param rmf_frame_index frame index from the rmf
199  @param nomap Default False, if True it will not calculate the contact map
200  """
201  (particles_resolution_one, prots)=self._get_rmf_structure(rmf_name,rmf_frame_index)
202 
203  total_len = sum(len(self.sequence_dict[s]) for s in self.sequence_dict)
204  print(self.sequence_dict,total_len)
205 
206  coords = np.ones((total_len,3)) * 1e6 #default to coords "very far away"
207  prev_stop=0
208  sorted_particles=IMP.pmi.tools.sort_by_residues(particles_resolution_one)
209 
210  self.prots=prots
211  self.particles_resolution_one=particles_resolution_one
212 
213  if nomap:
214  return
215 
216  for cname in chain_names:
217  print(cname)
218  if self._first:
219  self.index_dict[cname]=range(prev_stop,prev_stop+len(self.sequence_dict[cname]))
220  rindexes=range(1,len(self.sequence_dict[cname])+1)
221  for rnum in rindexes:
222  sel=IMP.atom.Selection(prots,molecule=cname,residue_index=rnum)
223  selpart=sel.get_selected_particles()
224  selpart_res_one=list(set(particles_resolution_one) & set(selpart))
225  if len(selpart_res_one)>1: continue
226  if len(selpart_res_one)==0: continue
227  selpart_res_one=selpart_res_one[0]
228  try:
229  coords[rnum+prev_stop-1,:]=IMP.core.XYZ(selpart_res_one).get_coordinates()
230  except IndexError:
231  print("Error: exceed max size",prev_stop,total_len,cname,rnum)
232  exit()
233  prev_stop+=len(self.sequence_dict[cname])
234  dists = cdist(coords, coords)
235  binary_dists = np.where((dists <= self.contact_threshold) & (dists >= 1.0), 1.0, 0.0)
236  if self._first:
237  self.dist_maps= [dists]
238  self.av_dist_map = dists
239  self.contact_freqs = binary_dists
240  self._first=False
241  else:
242  self.dist_maps.append(dists)
243  self.av_dist_map += dists
244  self.contact_freqs += binary_dists
245  self.num_rmfs+=1
246 
247 
248  def _get_rmf_structure(self,rmf_name,rmf_frame_index):
249  rh= RMF.open_rmf_file_read_only(rmf_name)
250  prots=IMP.rmf.create_hierarchies(rh, self.mdl)
251  IMP.rmf.load_frame(rh, rmf_frame_index)
252  print("getting coordinates for frame %i rmf file %s" % (rmf_frame_index, rmf_name))
253  del rh
254 
256 
257  protein_names=particle_dict.keys()
258  particles_resolution_one=[]
259  for k in particle_dict:
260  particles_resolution_one+=(particle_dict[k])
261 
262  return particles_resolution_one, prots
263 
264 
265  def save_maps(self,maps_fn):
266  maxlen=max(len(self.index_dict[key]) for key in self.index_dict)
267  cnames=[]
268  idxs=[]
269  for cname,idx in self.index_dict.iteritems():
270  cnames.append(cname)
271  idxs.append(idx+[-1]*(maxlen-len(idx)))
272  idx_array=np.array(idxs)
273  cname_array=np.array(cnames)
274  np.savez(maps_fn,
275  cname_array=cname_array,
276  idx_array=idx_array,
277  av_dist_map=self.av_dist_map,
278  contact_map=self.contact_freqs)
279 
280  def load_maps(self,maps_fn):
281  self.index_dict,self.av_dist_map,self.contact_freqs=self._internal_load_maps(maps_fn)
282 
283  def load_crosslinks(self,CrossLinkDataBase,distance_field=None):
284  """ read crosslinks from a CSV file.
285  provide a CrossLinkDataBaseKeywordsConverter to explain the columns
286  @distance_field is the optional keyword for the distance to be read form the file.
287  This can skip the rmf reading to calculate the distance of cross-links if
288  already provided in the csv file."""
289  if type(CrossLinkDataBase) is not IMP.pmi.io.crosslink.CrossLinkDataBase:
290  raise TypeError("Crosslink database must be a IMP.pmi.io.CrossLinkDataBase type")
291  self.cross_link_db=CrossLinkDataBase
292  if distance_field is not None:
293  total_len = sum(len(self.sequence_dict[s]) for s in self.sequence_dict)
294  zeros = np.zeros((total_len,3))
295  self.av_dist_map = cdist(zeros,zeros)
296  for xl in self.cross_link_db:
297  c1=xl[self.cross_link_db.protein1_key]
298  c2=xl[self.cross_link_db.protein2_key]
299  r1=xl[self.cross_link_db.residue1_key]
300  r2=xl[self.cross_link_db.residue2_key]
301  try:
302  self.stored_dists[(r1,c1,r2,c2)]=float(xl[distance_field])
303  self.stored_dists[(r2,c2,r1,c1)]=float(xl[distance_field])
304  except ValueError:
305  self.stored_dists[(r1,c1,r2,c2)]=10e6
306  self.stored_dists[(r2,c2,r1,c1)]=10e6
307  else:
308  for xl in self.cross_link_db:
309  c1=xl[self.cross_link_db.protein1_key]
310  c2=xl[self.cross_link_db.protein2_key]
311  r1=xl[self.cross_link_db.residue1_key]
312  r2=xl[self.cross_link_db.residue2_key]
313  self.stored_dists[(r1,c1,r2,c2)]=self._get_distance(r1,c1,r2,c2)
314  self.stored_dists[(r2,c2,r1,c1)]=self._get_distance(r2,c2,r1,c1)
315 
316  def set_residue_pairs_to_display(self,residue_type_pair):
317  """ select the atom names of residue pairs to plot on the contact map
318  list of residues types must be single letter code
319  e.g. residue_type_pair=("K","K")
320  """
321  rtp=sorted(residue_type_pair)
322  for prot1 in self.sequence_dict:
323  seq1=self.sequence_dict[prot1]
324  for nres1,res1 in enumerate(seq1):
325  for prot2 in self.sequence_dict:
326  seq2=self.sequence_dict[prot2]
327  for nres2,res2 in enumerate(seq2):
328  if sorted((res1,res2))==rtp:
329  self.residue_pair_list.append((nres1+1,prot1,nres2+1,prot2))
330 
331  def setup_contact_map(self):
332  """ loop through each distance map and get frequency of contacts
333  """
334  if self.num_pdbs!=0 and self.num_rmfs==0:
335  self.av_dist_map = 1.0/self.num_pdbs * self.av_dist_map
336  self.contact_freqs = 1.0/self.num_pdbs * self.contact_freqs
337  if self.num_pdbs==0 and self.num_rmfs!=0:
338  self.av_dist_map = 1.0/self.num_rmfs * self.av_dist_map
339  self.contact_freqs = 1.0/self.num_rmfs * self.contact_freqs
340 
341  def setup_difference_map(self,maps_fn1,maps_fn2,thresh):
342  idx1,av1,contact1=self._internal_load_maps(maps_fn1)
343  idx2,av2,contact2=self._internal_load_maps(maps_fn2)
344  if idx1!=idx2:
345  print("UH OH: index dictionaries do not match!")
346  exit()
347  self.index_dict=idx1
348  self.av_dist_map=av1 # should we store both somehow? only needed for XL
349 
350  def logic(c1,c2):
351  if c1==0 and c2==0: # white
352  return 0
353  elif c1>thresh and c2<thresh: # red
354  return 1
355  elif c1<thresh and c2>thresh: # blue
356  return 2
357  else: # green
358  return 3
359  f = np.vectorize(logic,otypes=[np.int])
360  print('computing contact map')
361  self.contact_freqs = f(contact1,contact2)
362  print('done')
363 
364 
365 
366  def spring_layout(self,ax,plt,data, annotations, iterations = 100, k=1):
367 
368  import networkx as nx
369  import numpy
370  import scipy.spatial
371 
372 
373  #import matplotlib.pyplot as plt
374 
375  """
376  Author: G. Bouvier, Pasteur Institute, Paris
377  Website: http://bloggb.fr/2015/10/19/spring_layout_for_annotating_plot_in_matplotlib.html
378  - data: coordinates of your points [(x1,y1), (x2,y2), ..., (xn,yn)]
379  - annotations: text for your annotation ['str_1', 'str_2', ..., 'str_n']
380  - iterations: number of iterations for spring layout
381  - k: optimal distance between nodes
382  """
383  G = nx.Graph()
384  init_pos = {} # initial position
385  x, y = [e[0] for e in data], [e[1] for e in data]
386  xmin, xmax = min(x), max(x)
387  ymin, ymax = min(y), max(y)
388 
389  m=IMP.Model()
390  particles={}
391  particles_fixed={}
392  mvs = []
393 
394  rs = IMP.RestraintSet(m, 'distance')
395 
396  for i,xy in enumerate(data):
397  p=IMP.Particle(m)
398  G.add_node(i)
399  init_pos[i] = xy
401  IMP.core.XYZ(p).set_coordinates((xy[0],xy[1],0))
402  IMP.core.XYZR(p).set_radius(100)
403  IMP.core.XYZ(p).set_coordinates_are_optimized(True)
404  particles[i]=p
405  Xfloatkey = IMP.core.XYZ.get_xyz_keys()[0]
406  Yfloatkey = IMP.core.XYZ.get_xyz_keys()[1]
407  mvs.append(IMP.core.NormalMover([p], [Xfloatkey], 5))
408  mvs.append(IMP.core.NormalMover([p], [Yfloatkey], 5))
409  p=IMP.Particle(m)
411  IMP.core.XYZ(p).set_coordinates((xy[0],xy[1],0))
412  IMP.core.XYZR(p).set_radius(100)
413  particles_fixed[i]=p
414  ts1 = IMP.core.HarmonicUpperBound(200, 0.001)
415  rs.add_restraint(
417  particles[i],
418  particles_fixed[i]))
419 
420  ssps = IMP.core.SoftSpherePairScore(1.0)
422 
423  ps = particles.values()+particles_fixed.values()
424 
425  lsa.add(IMP.get_indexes(ps))
426 
428  cpc = IMP.container.ClosePairContainer(lsa, 0.0, rbcpf, 10.0)
429  evr = IMP.container.PairsRestraint(ssps, cpc)
430  rs.add_restraint(evr)
431 
432  '''
433  #G.add_node(text)
434  #G.add_edge(xy, text)
435  #init_pos[text] = xy
436 
437  delTri = scipy.spatial.Delaunay(data)
438  #plt.scatter([e[0] for e in data], [e[1] for e in data])
439  # create a set for edges that are indexes of the points
440  edges = set()
441  # for each Delaunay triangle
442  for n in xrange(delTri.nsimplex):
443  # for each edge of the triangle
444  # sort the vertices
445  # (sorting avoids duplicated edges being added to the set)
446  # and add to the edges set
447  edge = sorted([delTri.vertices[n,0], delTri.vertices[n,1]])
448  edges.add((edge[0], edge[1]))
449  edge = sorted([delTri.vertices[n,0], delTri.vertices[n,2]])
450  edges.add((edge[0], edge[1]))
451  edge = sorted([delTri.vertices[n,1], delTri.vertices[n,2]])
452  edges.add((edge[0], edge[1]))
453 
454 
455  for e in edges:
456  G.add_edge(e[0],e[1])
457 
458  d1=IMP.core.XYZ(particles[e[0]])
459  d2=IMP.core.XYZ(particles[e[1]])
460  dist=IMP.core.get_distance(d1,d2)
461  ts1 = IMP.core.Harmonic(dist+150, 0.001)
462  rs.add_restraint(
463  IMP.core.DistanceRestraint(m, ts1,
464  particles[e[0]],
465  particles[e[1]]))
466  '''
467 
468  smv = IMP.core.SerialMover(mvs)
469 
470  mc = IMP.core.MonteCarlo(m)
471  mc.set_scoring_function(rs)
472  mc.set_return_best(False)
473  mc.set_kt(1.0)
474  mc.add_mover(smv)
475 
476 
477  '''
478  pos = nx.spring_layout(G ,pos=init_pos)
479  x_list=[]
480  y_list=[]
481  for i,xy in enumerate(data):
482  xynew = pos[i] * [xmax-xmin, ymax-ymin] + [xmin, ymin]
483  plt.plot((xy[0],xynew[0]), (xy[1],xynew[1]), 'k-')
484  x_list.append(xynew[0])
485  y_list.append(xynew[1])
486  #plt.annotate(name, xy, size=5, xycoords='data', xytext=xytext, textcoords='data', bbox=dict(boxstyle='round,pad=0.2', fc='yellow', alpha=0.3),\
487  # arrowprops=dict(arrowstyle="-", connectionstyle="arc3", color='gray'))
488 
489  points=ax.scatter(x_list,y_list,alpha=1)
490  #pos*=[numpy.ptp([e[0] for e in data]), numpy.ptp([e[1] for e in data])]
491  #plt.show()
492  return points
493  '''
494 
495  for i in range(10):
496  mc.optimize(1000*len(particles.keys())*2)
497  print(rs.evaluate(False))
498  x_list=[]
499  y_list=[]
500  for i,xy in enumerate(data):
501  p=particles[i]
502  coord=IMP.core.XYZ(p).get_coordinates()
503  xynew = (coord[0],coord[1])
504  plt.plot((xy[0],xynew[0]), (xy[1],xynew[1]), 'k-')
505  x_list.append(xynew[0])
506  y_list.append(xynew[1])
507  points=ax.scatter(x_list,y_list,alpha=1,facecolors='none', edgecolors='k')
508  return points
509 
510 
511 
512  def show_mpld3(self,fig,ax,points,xl_list,xl_labels):
513  import mpld3
514  from mpld3 import plugins
515  import pandas as pd
516 
517  # Define some CSS to control our custom labels
518  css = """
519  table
520  {
521  border-collapse: collapse;
522  }
523  th
524  {
525  color: #000000;
526  background-color: #ffffff;
527  }
528  td
529  {
530  background-color: #cccccc;
531  }
532  table, th, td
533  {
534  font-family:Arial, Helvetica, sans-serif;
535  border: 1px solid black;
536  text-align: right;
537  font-size: 10px;
538  }
539  """
540  df = pd.DataFrame(index=xl_labels)
541 
542  sorted_keys=sorted(xl_list[0].keys())
543 
544  for k in sorted_keys:
545  df[k] = np.array([xl[k] for xl in xl_list])
546 
547  labels = []
548  for i in range(len(xl_labels)):
549  label = df.ix[[i], :].T
550  # .to_html() is unicode; so make leading 'u' go away with str()
551  labels.append(str(label.to_html()))
552 
553  tooltip = plugins.PointHTMLTooltip(points, labels,
554  voffset=10, hoffset=10, css=css)
555  plugins.connect(fig, tooltip)
556  mpld3.save_html(fig,"output.html")
557 
558 
559  def get_residue_contacts(self,prot_listx,prot_listy):
560  for px in prot_listx:
561  for py in prot_listy:
562  indexes_x = self.index_dict[px]
563  minx = min(indexes_x)
564  maxx = max(indexes_x)
565  indexes_y = self.index_dict[py]
566  miny = min(indexes_y)
567  maxy = max(indexes_y)
568  array = self.contact_freqs[minx:maxx,miny:maxy]
569  (xresidues,yresidues)=np.where(array>0)
570  for n,xr in enumerate(xresidues):
571  print(xr,yresidues[n],px,py,array[xr,yresidues[n]])
572 
573 
574  def plot_table(self, prot_listx=None,
575  prot_listy=None,
576  no_dist_info=False,
577  confidence_info=False,
578  filter=None,
579  display_residue_pairs=False,
580  contactmap=False,
581  filename=None,
582  confidence_classes=None,
583  alphablend=0.1,
584  scale_symbol_size=1.0,
585  gap_between_components=0,
586  dictionary_of_gaps={},
587  colormap=cm.binary,
588  crosslink_threshold=None,
589  colornorm=None,
590  cbar_labels=None,
591  labels=False,
592  show_mpld3=False,
593  color_crosslinks_by_distance=True):
594  """ plot the xlink table with optional contact map.
595  prot_listx: list of protein names on the x-axis
596  prot_listy: list of protein names on the y-axis
597  no_dist_info: plot only the cross-links as grey spots
598  confidence_info:
599  filter: list of tuples to filter on. each one contains:
600  keyword in the database to be filtered on
601  relationship ">","==","<"
602  a value
603  example ("ID_Score",">",40)
604  display_residue_pairs: display all pairs defined in self.residue_pair_list
605  contactmap: display the contact map
606  filename: save to file (adds .pdf extension)
607  confidence_classes:
608  alphablend:
609  scale_symbol_size: rescale the symbol for the crosslink
610  gap_between_components: the numbeber of residues to leave blannk between each component
611  dictionary_of_gaps: add extra space after the given protein. dictionary_of_gaps={prot_name:extra_gap}
612  """
613  # prepare figure
614 
615 
616  fig = plt.figure(figsize=(100, 100))
617  ax = fig.add_subplot(111)
618  ax.set_xticks([])
619  ax.set_yticks([])
620 
621  if cbar_labels is not None:
622  if len(cbar_labels)!=4:
623  print("to provide cbar labels, give 3 fields (first=first input file, last=last input) in oppose order of input contact maps")
624  exit()
625  # set the list of proteins on the x axis
626  if prot_listx is None:
627  prot_listx = self.sequence_dict.keys()
628  prot_listx.sort()
629  nresx = gap_between_components + \
630  sum([len(self.sequence_dict[name])
631  + gap_between_components for name in prot_listx]) + \
632  sum([dictionary_of_gaps[prot] for prot in dictionary_of_gaps.keys()])
633 
634  # set the list of proteins on the y axis
635  if prot_listy is None:
636  prot_listy = self.sequence_dict.keys()
637  prot_listy.sort()
638  nresy = gap_between_components + \
639  sum([len(self.sequence_dict[name])
640  + gap_between_components for name in prot_listy]) + \
641  sum([dictionary_of_gaps[prot] for prot in dictionary_of_gaps.keys()])
642 
643  # this is the residue offset for each protein
644  resoffsetx = {}
645  resendx = {}
646  res = gap_between_components
647  for prot in prot_listx:
648  resoffsetx[prot] = res
649  res += len(self.sequence_dict[prot])
650  resendx[prot] = res
651  res += gap_between_components
652  if prot in dictionary_of_gaps.keys(): res+= dictionary_of_gaps[prot]
653 
654  resoffsety = {}
655  resendy = {}
656  res = gap_between_components
657  for prot in prot_listy:
658  resoffsety[prot] = res
659  res += len(self.sequence_dict[prot])
660  resendy[prot] = res
661  res += gap_between_components
662  if prot in dictionary_of_gaps.keys(): res+= dictionary_of_gaps[prot]
663 
664  resoffsetdiagonal = {}
665  res = gap_between_components
666  for prot in IMP.pmi.io.utilities.OrderedSet(prot_listx + prot_listy):
667  resoffsetdiagonal[prot] = res
668  res += len(self.sequence_dict[prot])
669  res += gap_between_components
670  if prot in dictionary_of_gaps.keys(): res+= dictionary_of_gaps[prot]
671 
672  # plot protein boundaries
673  xticks = []
674  xlabels = []
675  for n, prot in enumerate(prot_listx):
676  res = resoffsetx[prot]
677  end = resendx[prot]
678  for proty in prot_listy:
679  resy = resoffsety[proty]
680  endy = resendy[proty]
681  ax.plot([res, res], [resy, endy], linestyle='-',color='gray', lw=0.4)
682  ax.plot([end, end], [resy, endy], linestyle='-',color='gray', lw=0.4)
683  xticks.append((float(res) + float(end)) / 2)
684  xlabels.append(prot)
685 
686  yticks = []
687  ylabels = []
688  for n, prot in enumerate(prot_listy):
689  res = resoffsety[prot]
690  end = resendy[prot]
691  for protx in prot_listx:
692  resx = resoffsetx[protx]
693  endx = resendx[protx]
694  ax.plot([resx, endx], [res, res], linestyle='-',color='gray', lw=0.4)
695  ax.plot([resx, endx], [end, end], linestyle='-',color='gray', lw=0.4)
696  yticks.append((float(res) + float(end)) / 2)
697  ylabels.append(prot)
698 
699  # plot the contact map
700  if contactmap:
701  tmp_array = np.zeros((nresx, nresy))
702  for px in prot_listx:
703  for py in prot_listy:
704  resx = resoffsetx[px]
705  lengx = resendx[px] - 1
706  resy = resoffsety[py]
707  lengy = resendy[py] - 1
708  indexes_x = self.index_dict[px]
709  minx = min(indexes_x)
710  maxx = max(indexes_x)
711  indexes_y = self.index_dict[py]
712  miny = min(indexes_y)
713  maxy = max(indexes_y)
714  tmp_array[resx:lengx,resy:lengy] = self.contact_freqs[minx:maxx,miny:maxy]
715 
716  cax = ax.imshow(tmp_array,
717  cmap=colormap,
718  norm=colornorm,
719  origin='lower',
720  alpha=0.6,
721  interpolation='nearest')
722 
723  ax.set_xticks(xticks)
724  ax.set_xticklabels(xlabels, rotation=90)
725  ax.set_yticks(yticks)
726  ax.set_yticklabels(ylabels)
727  plt.setp(ax.get_xticklabels(), fontsize=6)
728  plt.setp(ax.get_yticklabels(), fontsize=6)
729 
730  # set the crosslinks
731  already_added_xls = []
732  xl_coordinates_tuple_list = []
733  xl_labels = []
734  x_list=[]
735  y_list=[]
736  color_list=[]
737  xl_list=[]
738 
739  markersize = 5 * scale_symbol_size
740  if self.cross_link_db:
741  for xl in self.cross_link_db:
742 
743  (c1,c2,r1,r2)=IMP.pmi.io.crosslink._ProteinsResiduesArray(xl)
744  label=xl[self.cross_link_db.unique_sub_id_key]
745  if color_crosslinks_by_distance:
746 
747  try:
748  mdist=self._get_distance(r1,c1,r2,c2)
749  if mdist is None: continue
750  color = self._colormap_distance(mdist,threshold=crosslink_threshold)
751  except KeyError:
752  color="gray"
753 
754  else:
755 
756  try:
757  ps=self._get_percentage_satisfaction(r1,c1,r2,c2)
758  if ps is None: continue
759  color = self._colormap_satisfaction(ps,threshold=0.2,tolerance=0.1)
760  except KeyError:
761  color="gray"
762 
763  try:
764  pos1 = r1 + resoffsetx[c1]
765  except:
766  continue
767  try:
768  pos2 = r2 + resoffsety[c2]
769  except:
770  continue
771 
772 
773  # everything below is used for plotting the diagonal
774  # when you have a rectangolar plots
775  pos_for_diagonal1 = r1 + resoffsetdiagonal[c1]
776  pos_for_diagonal2 = r2 + resoffsetdiagonal[c2]
777  if confidence_info:
778  if confidence == '0.01':
779  markersize = 14 * scale_symbol_size
780  elif confidence == '0.05':
781  markersize = 9 * scale_symbol_size
782  elif confidence == '0.1':
783  markersize = 6 * scale_symbol_size
784  else:
785  markersize = 15 * scale_symbol_size
786  else:
787  markersize = 5 * scale_symbol_size
788  '''
789  ax.plot([pos1],
790  [pos2],
791  'o',
792  c=color,
793  alpha=alphablend,
794  markersize=markersize)
795 
796  ax.plot([pos2],
797  [pos1],
798  'o',
799  c=color,
800  alpha=alphablend,
801  markersize=markersize)
802  '''
803  x_list.append(pos1)
804  x_list.append(pos2)
805  y_list.append(pos2)
806  y_list.append(pos1)
807  color_list.append(color)
808  color_list.append(color)
809  xl["Distance"]=mdist
810  xl_list.append(xl)
811  xl_list.append(xl)
812 
813  xl_labels.append(label)
814  xl_coordinates_tuple_list.append((float(pos1),float(pos2)))
815  xl_labels.append(label+"*")
816  xl_coordinates_tuple_list.append((float(pos2),float(pos1)))
817 
818  points=ax.scatter(x_list,y_list,s=markersize,c=color_list,alpha=alphablend)
819 
820  # plot requested residue pairs
821  if display_residue_pairs:
822  for rp in self.residue_pair_list:
823  r1=rp[0]
824  c1=rp[1]
825  r2=rp[2]
826  c2=rp[3]
827 
828  try:
829  dist=self._get_distance(r1,c1,r2,c2)
830  except:
831  continue
832 
833  if dist<=40.0:
834  print(rp)
835  try:
836  pos1 = r1 + resoffsetx[c1]
837  except:
838  continue
839  try:
840  pos2 = r2 + resoffsety[c2]
841  except:
842  continue
843 
844  ax.plot([pos1],
845  [pos2],
846  '+',
847  c="blue",
848  alpha=0.1,
849  markersize=markersize)
850 
851  # display and write to file
852  fig.set_size_inches(0.002 * nresx, 0.002 * nresy)
853  [i.set_linewidth(2.0) for i in ax.spines.itervalues()]
854  if cbar_labels is not None:
855  cbar = fig.colorbar(cax, ticks=[0.5,1.5,2.5,3.5])
856  cbar.ax.set_yticklabels(cbar_labels)# vertically oriented colorbar
857 
858  # set the labels using spring layout
859  if labels:
860  points=self.spring_layout(ax,plt,xl_coordinates_tuple_list, xl_labels)
861 
862  if filename:
863  #plt.show()
864  plt.savefig(filename, dpi=300,transparent="False")
865 
866 
867  if show_mpld3:
868  self.show_mpld3(fig,ax,points,xl_list,xl_labels)
A Monte Carlo optimizer.
Definition: MonteCarlo.h:45
Utility classes and functions for IO.
Definition: utilities.py:1
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
Set of python classes to create a multi-state, multi-resolution IMP hierarchy.
Upper bound harmonic function (non-zero when feature > mean)
class to read, analyze, and plot xlink data on contact maps Canonical way to read the data: 1) load s...
Definition: xltable.py:28
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Utility classes and functions for reading and storing PMI files.
def plot_table
plot the xlink table with optional contact map.
Definition: xltable.py:574
def load_rmf_coordinates
read coordinates from a rmf file.
Definition: xltable.py:193
Return all close unordered pairs of particles taken from the SingletonContainer.
def load_pdb_coordinates
read coordinates from a pdb file.
Definition: xltable.py:155
Distance restraint between two particles.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
void read_pdb(TextInput input, int model, Hierarchy h)
Object used to hold a set of restraints.
Definition: RestraintSet.h:36
def setup_contact_map
loop through each distance map and get frequency of contacts
Definition: xltable.py:331
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
def load_sequence_from_fasta_file
read sequence.
Definition: xltable.py:139
ParticleIndexPairs get_indexes(const ParticlePairsTemp &ps)
def set_residue_pairs_to_display
select the atom names of residue pairs to plot on the contact map list of residues types must be sing...
Definition: xltable.py:316
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Store a list of ParticleIndexes.
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
def load_crosslinks
read crosslinks from a CSV file.
Definition: xltable.py:283
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
Modify a set of continuous variables using a normal distribution.
Definition: NormalMover.h:20
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Class to handle individual particles of a Model object.
Definition: Particle.h:41
Select all CA ATOM records.
Definition: pdb.h:77
static const FloatKeys & get_xyz_keys()
Get a vector containing the keys for x,y,z.
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Support for the RMF file format for storing hierarchical molecular data and markup.
Applies a list of movers one at a time.
Definition: SerialMover.h:23
Applies a PairScore to each Pair in a list.
Perform more efficient close pair finding when rigid bodies are involved.
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27