IMP logo
IMP Reference Guide  2.17.0
The Integrative Modeling Platform
xltable.py
1 """@namespace IMP.pmi1.io.xltable
2  Tools to plot a contact map overlaid with cross-links.
3 """
4 
5 from __future__ import print_function
6 from math import sqrt
7 from scipy.spatial.distance import cdist
8 import numpy as np
9 import matplotlib as mpl
10 mpl.use('Agg')
11 import matplotlib.cm as cm
12 import matplotlib.pyplot as plt
13 
14 from collections import defaultdict
15 import pickle
16 import IMP
17 import IMP.atom
18 import IMP.rmf
19 import RMF
20 import IMP.pmi1
21 import IMP.pmi1.io
24 import IMP.pmi1.topology
25 import IMP.pmi1.analysis
26 import operator
27 
28 @IMP.deprecated_object("2.8", "Use PMI2, IMP.pmi1.macros.AnalysisReplicaExchange, instead")
29 class XLTable():
30  """ class to read, analyze, and plot xlink data on contact maps
31  Canonical way to read the data:
32  1) load sequences and name them
33  2) load coordinates for those sequences from PDB file
34  3) add crosslinks
35  4) create contact map
36  5) plot
37  """
38 
39  def __init__(self,contact_threshold):
40  self.sequence_dict={}
41  self.cross_link_db = None
42  self.residue_pair_list = [] # list of special residue pairs to display
43  self.distance_maps = [] # distance map for each copy of the complex
44  self.contact_freqs = None
45  self.num_pdbs = 0
46  self.num_rmfs = 0
47  self.index_dict = defaultdict(list) # location in the dmap of each residue
48  self.contact_threshold = contact_threshold
49  # internal things
50  self._first = True
51  self.index_dict={}
52  self.stored_dists={}
53  self.model = IMP.Model()
54 
55  @property
56  @IMP.deprecated_method("3.0", "Model should be accessed with `.model`.")
57  def mdl(self):
58  return self.model
59 
60  def _colormap_distance(self, dist, threshold=35, tolerance=0):
61  if dist < threshold - tolerance:
62  return "Green"
63  elif dist >= threshold + tolerance:
64  return "Orange"
65  else:
66  return "Red"
67 
68  def _colormap_satisfaction(self, sat, threshold=0.5, tolerance=0.1):
69  if sat >= threshold + tolerance:
70  print(sat, "green")
71  return "Green"
72  elif sat < threshold + tolerance and sat >= threshold - tolerance :
73  print(sat, "orange")
74  return "Orange"
75  else:
76  print(sat, "orange")
77  return "Orange"
78 
79  def _get_percentage_satisfaction(self,r1,c1,r2,c2,threshold=35):
80  try:
81  idx1=self.index_dict[c1][r1]
82  except:
83  return None
84  try:
85  idx2=self.index_dict[c2][r2]
86  except:
87  return None
88  nsatisfied=0
89  for dists in self.dist_maps:
90  dist=dists[idx1,idx2]
91  if dist<threshold: nsatisfied+=1
92  return float(nsatisfied)/len(self.dist_maps)
93 
94  def _get_distance(self,r1,c1,r2,c2):
95  if self.index_dict is not None:
96  try:
97  idx1=self.index_dict[c1][r1]
98  except:
99  return None
100  try:
101  idx2=self.index_dict[c2][r2]
102  except:
103  return None
104  return self.av_dist_map[idx1,idx2]
105  else:
106  if (r1,c1,r2,c2) not in self.stored_dists.keys():
107  sel=IMP.atom.Selection(self.prots,molecule=c1,residue_index=r1)
108  selpart=sel.get_selected_particles()
109  selpart_res_one=list(set(self.particles_resolution_one) & set(selpart))
110  if len(selpart_res_one)>1: return None
111  if len(selpart_res_one)==0: return None
112  selpart_res_one_1=selpart_res_one[0]
113  sel=IMP.atom.Selection(self.prots,molecule=c2,residue_index=r2)
114  selpart=sel.get_selected_particles()
115  selpart_res_one=list(set(self.particles_resolution_one) & set(selpart))
116  if len(selpart_res_one)>1: return None
117  if len(selpart_res_one)==0: return None
118  selpart_res_one_2=selpart_res_one[0]
119  d1=IMP.core.XYZ(selpart_res_one_1)
120  d2=IMP.core.XYZ(selpart_res_one_2)
121  dist=IMP.core.get_distance(d1,d2)
122  self.stored_dists[(r1,c1,r2,c2)]=dist
123  else:
124  dist=self.stored_dists[(r1,c1,r2,c2)]
125  return dist
126 
127 
128 
129 
130  def _internal_load_maps(self,maps_fn):
131  npzfile = np.load(maps_fn)
132  cname_array=npzfile['cname_array']
133  idx_array=npzfile['idx_array']
134  index_dict={}
135  for cname,idxs in zip(cname_array,idx_array):
136  tmp=list(idxs)
137  if -1 in tmp:
138  index_dict[cname]=tmp[0:tmp.index(-1)]
139  else:
140  index_dict[cname]=tmp
141  av_dist_map = npzfile['av_dist_map']
142  contact_map = npzfile['contact_map']
143  return index_dict,av_dist_map,contact_map
144 
145  def load_sequence_from_fasta_file(self,fasta_file,id_in_fasta_file,protein_name):
146  """ read sequence. structures are displayed in the same order as sequences are read.
147  fasta_file: file to read
148  id_in_fasta_file: id of desired sequence
149  protein_name: identifier for this sequence (use same name when handling coordinates)
150  can provide the fasta name (for retrieval) and the protein name (for storage) """
151 
152  record_dict = IMP.pmi1.topology.Sequences(fasta_file)
153  if id_in_fasta_file is None:
154  id_in_fasta_file = name
155  if id_in_fasta_file not in record_dict:
156  raise KeyError("id %s not found in fasta file" % id_in_fasta_file)
157  length = len(record_dict[id_in_fasta_file])
158 
159  self.sequence_dict[protein_name] = str(record_dict[id_in_fasta_file])
160 
161  def load_pdb_coordinates(self,pdbfile,chain_to_name_map):
162  """ read coordinates from a pdb file. also appends to distance maps
163  @param pdbfile file for reading coords
164  @param chain_to_name_map correspond chain ID with protein name (will ONLY read these chains)
165  Key: PDB chain ID. Value: Protein name (set in sequence reading)
166  \note This function returns an error if the sequence for each chain has NOT been read
167  """
168  mh = IMP.atom.read_pdb(pdbfile,self.model,IMP.atom.CAlphaPDBSelector())
169  total_len = sum(len(self.sequence_dict[s]) for s in self.sequence_dict)
170  coords = np.ones((total_len,3)) * 1e5 #default to coords "very far away"
171  prev_stop = 0
172  for cid in chain_to_name_map:
173  cname = chain_to_name_map[cid]
174  if cname not in self.sequence_dict:
175  print("ERROR: chain",cname,'has not been read or has a naming mismatch')
176  return
177  if self._first:
178  self.index_dict[cname]=range(prev_stop,prev_stop+len(self.sequence_dict[cname]))
179  sel = IMP.atom.Selection(mh,chain_id=cid)
180  for p in sel.get_selected_particles():
182  coords[rnum+prev_stop-1,:] = list(IMP.core.XYZ(p).get_coordinates())
183  prev_stop+=len(self.sequence_dict[cname])
184  dists = cdist(coords, coords)
185  binary_dists = np.where((dists <= self.contact_threshold) & (dists >= 1.0), 1.0, 0.0)
186  if self._first:
187  self.dist_maps= [dists]
188  self.av_dist_map = dists
189  self.contact_freqs = binary_dists
190  self._first=False
191  else:
192  self.dist_maps.append(dists)
193  self.av_dist_map += dists
194  self.contact_freqs += binary_dists
195  IMP.atom.destroy(mh)
196  del mh
197  self.num_pdbs+=1
198 
199  def load_rmf_coordinates(self,rmf_name,rmf_frame_index, chain_names, nomap=False):
200  """ read coordinates from a rmf file. It needs IMP to run.
201  rmf has been created using IMP.pmi1 conventions. It gets the
202  highest resolution automatically. Also appends to distance maps
203  @param rmf_name file for reading coords
204  @param rmf_frame_index frame index from the rmf
205  @param nomap Default False, if True it will not calculate the contact map
206  """
207  (particles_resolution_one, prots)=self._get_rmf_structure(rmf_name,rmf_frame_index)
208 
209  total_len = sum(len(self.sequence_dict[s]) for s in self.sequence_dict)
210  print(self.sequence_dict,total_len)
211 
212  coords = np.ones((total_len,3)) * 1e6 #default to coords "very far away"
213  prev_stop=0
214  sorted_particles=IMP.pmi1.tools.sort_by_residues(particles_resolution_one)
215 
216  self.prots=prots
217  self.particles_resolution_one=particles_resolution_one
218 
219  if nomap:
220  return
221 
222  for cname in chain_names:
223  print(cname)
224  if self._first:
225  self.index_dict[cname]=range(prev_stop,prev_stop+len(self.sequence_dict[cname]))
226  rindexes=range(1,len(self.sequence_dict[cname])+1)
227  for rnum in rindexes:
228  sel=IMP.atom.Selection(prots,molecule=cname,residue_index=rnum)
229  selpart=sel.get_selected_particles()
230  selpart_res_one=list(set(particles_resolution_one) & set(selpart))
231  if len(selpart_res_one)>1: continue
232  if len(selpart_res_one)==0: continue
233  selpart_res_one=selpart_res_one[0]
234  try:
235  coords[rnum+prev_stop-1,:]=IMP.core.XYZ(selpart_res_one).get_coordinates()
236  except IndexError:
237  print("Error: exceed max size",prev_stop,total_len,cname,rnum)
238  exit()
239  prev_stop+=len(self.sequence_dict[cname])
240  dists = cdist(coords, coords)
241  binary_dists = np.where((dists <= self.contact_threshold) & (dists >= 1.0), 1.0, 0.0)
242  if self._first:
243  self.dist_maps= [dists]
244  self.av_dist_map = dists
245  self.contact_freqs = binary_dists
246  self._first=False
247  else:
248  self.dist_maps.append(dists)
249  self.av_dist_map += dists
250  self.contact_freqs += binary_dists
251  self.num_rmfs+=1
252 
253 
254  def _get_rmf_structure(self,rmf_name,rmf_frame_index):
255  rh= RMF.open_rmf_file_read_only(rmf_name)
256  prots=IMP.rmf.create_hierarchies(rh, self.model)
257  IMP.rmf.load_frame(rh, rmf_frame_index)
258  print("getting coordinates for frame %i rmf file %s" % (rmf_frame_index, rmf_name))
259  del rh
260 
262 
263  protein_names=particle_dict.keys()
264  particles_resolution_one=[]
265  for k in particle_dict:
266  particles_resolution_one+=(particle_dict[k])
267 
268  return particles_resolution_one, prots
269 
270 
271  def save_maps(self,maps_fn):
272  maxlen=max(len(self.index_dict[key]) for key in self.index_dict)
273  cnames=[]
274  idxs=[]
275  for cname,idx in self.index_dict.items():
276  cnames.append(cname)
277  idxs.append(idx+[-1]*(maxlen-len(idx)))
278  idx_array=np.array(idxs)
279  cname_array=np.array(cnames)
280  np.savez(maps_fn,
281  cname_array=cname_array,
282  idx_array=idx_array,
283  av_dist_map=self.av_dist_map,
284  contact_map=self.contact_freqs)
285 
286  def load_maps(self,maps_fn):
287  self.index_dict,self.av_dist_map,self.contact_freqs=self._internal_load_maps(maps_fn)
288 
289  def load_crosslinks(self,CrossLinkDataBase,distance_field=None):
290  """ read crosslinks from a CSV file.
291  provide a CrossLinkDataBaseKeywordsConverter to explain the columns
292  @distance_field is the optional keyword for the distance to be read form the file.
293  This can skip the rmf reading to calculate the distance of cross-links if
294  already provided in the csv file."""
295  if type(CrossLinkDataBase) is not IMP.pmi1.io.crosslink.CrossLinkDataBase:
296  raise TypeError("Crosslink database must be a IMP.pmi1.io.CrossLinkDataBase type")
297  self.cross_link_db=CrossLinkDataBase
298  if distance_field is not None:
299  total_len = sum(len(self.sequence_dict[s]) for s in self.sequence_dict)
300  zeros = np.zeros((total_len,3))
301  self.av_dist_map = cdist(zeros,zeros)
302  for xl in self.cross_link_db:
303  c1=xl[self.cross_link_db.protein1_key]
304  c2=xl[self.cross_link_db.protein2_key]
305  r1=xl[self.cross_link_db.residue1_key]
306  r2=xl[self.cross_link_db.residue2_key]
307  try:
308  self.stored_dists[(r1,c1,r2,c2)]=float(xl[distance_field])
309  self.stored_dists[(r2,c2,r1,c1)]=float(xl[distance_field])
310  except ValueError:
311  self.stored_dists[(r1,c1,r2,c2)]=10e6
312  self.stored_dists[(r2,c2,r1,c1)]=10e6
313  else:
314  for xl in self.cross_link_db:
315  c1=xl[self.cross_link_db.protein1_key]
316  c2=xl[self.cross_link_db.protein2_key]
317  r1=xl[self.cross_link_db.residue1_key]
318  r2=xl[self.cross_link_db.residue2_key]
319  self.stored_dists[(r1,c1,r2,c2)]=self._get_distance(r1,c1,r2,c2)
320  self.stored_dists[(r2,c2,r1,c1)]=self._get_distance(r2,c2,r1,c1)
321 
322  def set_residue_pairs_to_display(self,residue_type_pair):
323  """ select the atom names of residue pairs to plot on the contact map
324  list of residues types must be single letter code
325  e.g. residue_type_pair=("K","K")
326  """
327  rtp=sorted(residue_type_pair)
328  for prot1 in self.sequence_dict:
329  seq1=self.sequence_dict[prot1]
330  for nres1,res1 in enumerate(seq1):
331  for prot2 in self.sequence_dict:
332  seq2=self.sequence_dict[prot2]
333  for nres2,res2 in enumerate(seq2):
334  if sorted((res1,res2))==rtp:
335  self.residue_pair_list.append((nres1+1,prot1,nres2+1,prot2))
336 
337  def setup_contact_map(self):
338  """ loop through each distance map and get frequency of contacts
339  """
340  if self.num_pdbs!=0 and self.num_rmfs==0:
341  self.av_dist_map = 1.0/self.num_pdbs * self.av_dist_map
342  self.contact_freqs = 1.0/self.num_pdbs * self.contact_freqs
343  if self.num_pdbs==0 and self.num_rmfs!=0:
344  self.av_dist_map = 1.0/self.num_rmfs * self.av_dist_map
345  self.contact_freqs = 1.0/self.num_rmfs * self.contact_freqs
346 
347  def setup_difference_map(self,maps_fn1,maps_fn2,thresh):
348  idx1,av1,contact1=self._internal_load_maps(maps_fn1)
349  idx2,av2,contact2=self._internal_load_maps(maps_fn2)
350  if idx1!=idx2:
351  print("UH OH: index dictionaries do not match!")
352  exit()
353  self.index_dict=idx1
354  self.av_dist_map=av1 # should we store both somehow? only needed for XL
355 
356  def logic(c1,c2):
357  if c1==0 and c2==0: # white
358  return 0
359  elif c1>thresh and c2<thresh: # red
360  return 1
361  elif c1<thresh and c2>thresh: # blue
362  return 2
363  else: # green
364  return 3
365  f = np.vectorize(logic,otypes=[np.int])
366  print('computing contact map')
367  self.contact_freqs = f(contact1,contact2)
368  print('done')
369 
370 
371 
372  def spring_layout(self,ax,plt,data, annotations, iterations = 100, k=1):
373 
374  import networkx as nx
375  import numpy
376  import scipy.spatial
377 
378 
379  #import matplotlib.pyplot as plt
380 
381  """
382  Author: G. Bouvier, Pasteur Institute, Paris
383  Website: http://bloggb.fr/2015/10/19/spring_layout_for_annotating_plot_in_matplotlib.html
384  - data: coordinates of your points [(x1,y1), (x2,y2), ..., (xn,yn)]
385  - annotations: text for your annotation ['str_1', 'str_2', ..., 'str_n']
386  - iterations: number of iterations for spring layout
387  - k: optimal distance between nodes
388  """
389  G = nx.Graph()
390  init_pos = {} # initial position
391  x, y = [e[0] for e in data], [e[1] for e in data]
392  xmin, xmax = min(x), max(x)
393  ymin, ymax = min(y), max(y)
394 
395  m=IMP.Model()
396  particles={}
397  particles_fixed={}
398  mvs = []
399 
400  rs = IMP.RestraintSet(m, 'distance')
401 
402  for i,xy in enumerate(data):
403  p=IMP.Particle(m)
404  G.add_node(i)
405  init_pos[i] = xy
407  IMP.core.XYZ(p).set_coordinates((xy[0],xy[1],0))
408  IMP.core.XYZR(p).set_radius(100)
409  IMP.core.XYZ(p).set_coordinates_are_optimized(True)
410  particles[i]=p
411  Xfloatkey = IMP.core.XYZ.get_xyz_keys()[0]
412  Yfloatkey = IMP.core.XYZ.get_xyz_keys()[1]
413  mvs.append(IMP.core.NormalMover([p], [Xfloatkey], 5))
414  mvs.append(IMP.core.NormalMover([p], [Yfloatkey], 5))
415  p=IMP.Particle(m)
417  IMP.core.XYZ(p).set_coordinates((xy[0],xy[1],0))
418  IMP.core.XYZR(p).set_radius(100)
419  particles_fixed[i]=p
420  ts1 = IMP.core.HarmonicUpperBound(200, 0.001)
421  rs.add_restraint(
423  particles[i],
424  particles_fixed[i]))
425 
426  ssps = IMP.core.SoftSpherePairScore(1.0)
428 
429  ps = particles.values()+particles_fixed.values()
430 
431  lsa.add(IMP.get_indexes(ps))
432 
434  cpc = IMP.container.ClosePairContainer(lsa, 0.0, rbcpf, 10.0)
435  evr = IMP.container.PairsRestraint(ssps, cpc)
436  rs.add_restraint(evr)
437 
438  '''
439  #G.add_node(text)
440  #G.add_edge(xy, text)
441  #init_pos[text] = xy
442 
443  delTri = scipy.spatial.Delaunay(data)
444  #plt.scatter([e[0] for e in data], [e[1] for e in data])
445  # create a set for edges that are indexes of the points
446  edges = set()
447  # for each Delaunay triangle
448  for n in xrange(delTri.nsimplex):
449  # for each edge of the triangle
450  # sort the vertices
451  # (sorting avoids duplicated edges being added to the set)
452  # and add to the edges set
453  edge = sorted([delTri.vertices[n,0], delTri.vertices[n,1]])
454  edges.add((edge[0], edge[1]))
455  edge = sorted([delTri.vertices[n,0], delTri.vertices[n,2]])
456  edges.add((edge[0], edge[1]))
457  edge = sorted([delTri.vertices[n,1], delTri.vertices[n,2]])
458  edges.add((edge[0], edge[1]))
459 
460 
461  for e in edges:
462  G.add_edge(e[0],e[1])
463 
464  d1=IMP.core.XYZ(particles[e[0]])
465  d2=IMP.core.XYZ(particles[e[1]])
466  dist=IMP.core.get_distance(d1,d2)
467  ts1 = IMP.core.Harmonic(dist+150, 0.001)
468  rs.add_restraint(
469  IMP.core.DistanceRestraint(m, ts1,
470  particles[e[0]],
471  particles[e[1]]))
472  '''
473 
474  smv = IMP.core.SerialMover(mvs)
475 
476  mc = IMP.core.MonteCarlo(m)
477  mc.set_scoring_function(rs)
478  mc.set_return_best(False)
479  mc.set_kt(1.0)
480  mc.add_mover(smv)
481 
482 
483  '''
484  pos = nx.spring_layout(G ,pos=init_pos)
485  x_list=[]
486  y_list=[]
487  for i,xy in enumerate(data):
488  xynew = pos[i] * [xmax-xmin, ymax-ymin] + [xmin, ymin]
489  plt.plot((xy[0],xynew[0]), (xy[1],xynew[1]), 'k-')
490  x_list.append(xynew[0])
491  y_list.append(xynew[1])
492  #plt.annotate(name, xy, size=5, xycoords='data', xytext=xytext, textcoords='data', bbox=dict(boxstyle='round,pad=0.2', fc='yellow', alpha=0.3),\
493  # arrowprops=dict(arrowstyle="-", connectionstyle="arc3", color='gray'))
494 
495  points=ax.scatter(x_list,y_list,alpha=1)
496  #pos*=[numpy.ptp([e[0] for e in data]), numpy.ptp([e[1] for e in data])]
497  #plt.show()
498  return points
499  '''
500 
501  for i in range(10):
502  mc.optimize(1000*len(particles.keys())*2)
503  print(rs.evaluate(False))
504  x_list=[]
505  y_list=[]
506  for i,xy in enumerate(data):
507  p=particles[i]
508  coord=IMP.core.XYZ(p).get_coordinates()
509  xynew = (coord[0],coord[1])
510  plt.plot((xy[0],xynew[0]), (xy[1],xynew[1]), 'k-')
511  x_list.append(xynew[0])
512  y_list.append(xynew[1])
513  points=ax.scatter(x_list,y_list,alpha=1,facecolors='none', edgecolors='k')
514  return points
515 
516 
517 
518  def show_mpld3(self,fig,ax,points,xl_list,xl_labels):
519  import mpld3
520  from mpld3 import plugins
521  import pandas as pd
522 
523  # Define some CSS to control our custom labels
524  css = """
525  table
526  {
527  border-collapse: collapse;
528  }
529  th
530  {
531  color: #000000;
532  background-color: #ffffff;
533  }
534  td
535  {
536  background-color: #cccccc;
537  }
538  table, th, td
539  {
540  font-family:Arial, Helvetica, sans-serif;
541  border: 1px solid black;
542  text-align: right;
543  font-size: 10px;
544  }
545  """
546  df = pd.DataFrame(index=xl_labels)
547 
548  sorted_keys=sorted(xl_list[0].keys())
549 
550  for k in sorted_keys:
551  df[k] = np.array([xl[k] for xl in xl_list])
552 
553  labels = []
554  for i in range(len(xl_labels)):
555  label = df.ix[[i], :].T
556  # .to_html() is unicode; so make leading 'u' go away with str()
557  labels.append(str(label.to_html()))
558 
559  tooltip = plugins.PointHTMLTooltip(points, labels,
560  voffset=10, hoffset=10, css=css)
561  plugins.connect(fig, tooltip)
562  mpld3.save_html(fig,"output.html")
563 
564 
565  def get_residue_contacts(self,prot_listx,prot_listy):
566  for px in prot_listx:
567  for py in prot_listy:
568  indexes_x = self.index_dict[px]
569  minx = min(indexes_x)
570  maxx = max(indexes_x)
571  indexes_y = self.index_dict[py]
572  miny = min(indexes_y)
573  maxy = max(indexes_y)
574  array = self.contact_freqs[minx:maxx,miny:maxy]
575  (xresidues,yresidues)=np.where(array>0)
576  for n,xr in enumerate(xresidues):
577  print(xr,yresidues[n],px,py,array[xr,yresidues[n]])
578 
579 
580  def plot_table(self, prot_listx=None,
581  prot_listy=None,
582  no_dist_info=False,
583  confidence_info=False,
584  filter=None,
585  display_residue_pairs=False,
586  contactmap=False,
587  filename=None,
588  confidence_classes=None,
589  alphablend=0.1,
590  scale_symbol_size=1.0,
591  gap_between_components=0,
592  dictionary_of_gaps={},
593  colormap=cm.binary,
594  crosslink_threshold=None,
595  colornorm=None,
596  cbar_labels=None,
597  labels=False,
598  show_mpld3=False,
599  color_crosslinks_by_distance=True):
600  """ plot the xlink table with optional contact map.
601  prot_listx: list of protein names on the x-axis
602  prot_listy: list of protein names on the y-axis
603  no_dist_info: plot only the cross-links as grey spots
604  confidence_info:
605  filter: list of tuples to filter on. each one contains:
606  keyword in the database to be filtered on
607  relationship ">","==","<"
608  a value
609  example ("ID_Score",">",40)
610  display_residue_pairs: display all pairs defined in self.residue_pair_list
611  contactmap: display the contact map
612  filename: save to file (adds .pdf extension)
613  confidence_classes:
614  alphablend:
615  scale_symbol_size: rescale the symbol for the crosslink
616  gap_between_components: the numbeber of residues to leave blannk between each component
617  dictionary_of_gaps: add extra space after the given protein. dictionary_of_gaps={prot_name:extra_gap}
618  """
619  # prepare figure
620 
621 
622  fig = plt.figure(figsize=(100, 100))
623  ax = fig.add_subplot(111)
624  ax.set_xticks([])
625  ax.set_yticks([])
626 
627  if cbar_labels is not None:
628  if len(cbar_labels)!=4:
629  print("to provide cbar labels, give 3 fields (first=first input file, last=last input) in oppose order of input contact maps")
630  exit()
631  # set the list of proteins on the x axis
632  if prot_listx is None:
633  prot_listx = self.sequence_dict.keys()
634  prot_listx.sort()
635  nresx = gap_between_components + \
636  sum([len(self.sequence_dict[name])
637  + gap_between_components for name in prot_listx]) + \
638  sum([dictionary_of_gaps[prot] for prot in dictionary_of_gaps.keys()])
639 
640  # set the list of proteins on the y axis
641  if prot_listy is None:
642  prot_listy = self.sequence_dict.keys()
643  prot_listy.sort()
644  nresy = gap_between_components + \
645  sum([len(self.sequence_dict[name])
646  + gap_between_components for name in prot_listy]) + \
647  sum([dictionary_of_gaps[prot] for prot in dictionary_of_gaps.keys()])
648 
649  # this is the residue offset for each protein
650  resoffsetx = {}
651  resendx = {}
652  res = gap_between_components
653  for prot in prot_listx:
654  resoffsetx[prot] = res
655  res += len(self.sequence_dict[prot])
656  resendx[prot] = res
657  res += gap_between_components
658  if prot in dictionary_of_gaps.keys(): res+= dictionary_of_gaps[prot]
659 
660  resoffsety = {}
661  resendy = {}
662  res = gap_between_components
663  for prot in prot_listy:
664  resoffsety[prot] = res
665  res += len(self.sequence_dict[prot])
666  resendy[prot] = res
667  res += gap_between_components
668  if prot in dictionary_of_gaps.keys(): res+= dictionary_of_gaps[prot]
669 
670  resoffsetdiagonal = {}
671  res = gap_between_components
672  for prot in IMP.pmi1.io.utilities.OrderedSet(prot_listx + prot_listy):
673  resoffsetdiagonal[prot] = res
674  res += len(self.sequence_dict[prot])
675  res += gap_between_components
676  if prot in dictionary_of_gaps.keys(): res+= dictionary_of_gaps[prot]
677 
678  # plot protein boundaries
679  xticks = []
680  xlabels = []
681  for n, prot in enumerate(prot_listx):
682  res = resoffsetx[prot]
683  end = resendx[prot]
684  for proty in prot_listy:
685  resy = resoffsety[proty]
686  endy = resendy[proty]
687  ax.plot([res, res], [resy, endy], linestyle='-',color='gray', lw=0.4)
688  ax.plot([end, end], [resy, endy], linestyle='-',color='gray', lw=0.4)
689  xticks.append((float(res) + float(end)) / 2)
690  xlabels.append(prot)
691 
692  yticks = []
693  ylabels = []
694  for n, prot in enumerate(prot_listy):
695  res = resoffsety[prot]
696  end = resendy[prot]
697  for protx in prot_listx:
698  resx = resoffsetx[protx]
699  endx = resendx[protx]
700  ax.plot([resx, endx], [res, res], linestyle='-',color='gray', lw=0.4)
701  ax.plot([resx, endx], [end, end], linestyle='-',color='gray', lw=0.4)
702  yticks.append((float(res) + float(end)) / 2)
703  ylabels.append(prot)
704 
705  # plot the contact map
706  if contactmap:
707  tmp_array = np.zeros((nresx, nresy))
708  for px in prot_listx:
709  for py in prot_listy:
710  resx = resoffsetx[px]
711  lengx = resendx[px] - 1
712  resy = resoffsety[py]
713  lengy = resendy[py] - 1
714  indexes_x = self.index_dict[px]
715  minx = min(indexes_x)
716  maxx = max(indexes_x)
717  indexes_y = self.index_dict[py]
718  miny = min(indexes_y)
719  maxy = max(indexes_y)
720  tmp_array[resx:lengx,resy:lengy] = self.contact_freqs[minx:maxx,miny:maxy]
721 
722  cax = ax.imshow(tmp_array,
723  cmap=colormap,
724  norm=colornorm,
725  origin='lower',
726  alpha=0.6,
727  interpolation='nearest')
728 
729  ax.set_xticks(xticks)
730  ax.set_xticklabels(xlabels, rotation=90)
731  ax.set_yticks(yticks)
732  ax.set_yticklabels(ylabels)
733  plt.setp(ax.get_xticklabels(), fontsize=6)
734  plt.setp(ax.get_yticklabels(), fontsize=6)
735 
736  # set the crosslinks
737  already_added_xls = []
738  xl_coordinates_tuple_list = []
739  xl_labels = []
740  x_list=[]
741  y_list=[]
742  color_list=[]
743  xl_list=[]
744 
745  markersize = 5 * scale_symbol_size
746  if self.cross_link_db:
747  for xl in self.cross_link_db:
748 
749  (c1,c2,r1,r2)=IMP.pmi1.io.crosslink._ProteinsResiduesArray(xl)
750  label=xl[self.cross_link_db.unique_sub_id_key]
751  if color_crosslinks_by_distance:
752 
753  try:
754  mdist=self._get_distance(r1,c1,r2,c2)
755  if mdist is None: continue
756  color = self._colormap_distance(mdist,threshold=crosslink_threshold)
757  except KeyError:
758  color="gray"
759 
760  else:
761 
762  try:
763  ps=self._get_percentage_satisfaction(r1,c1,r2,c2)
764  if ps is None: continue
765  color = self._colormap_satisfaction(ps,threshold=0.2,tolerance=0.1)
766  except KeyError:
767  color="gray"
768 
769  try:
770  pos1 = r1 + resoffsetx[c1]
771  except:
772  continue
773  try:
774  pos2 = r2 + resoffsety[c2]
775  except:
776  continue
777 
778 
779  # everything below is used for plotting the diagonal
780  # when you have a rectangolar plots
781  pos_for_diagonal1 = r1 + resoffsetdiagonal[c1]
782  pos_for_diagonal2 = r2 + resoffsetdiagonal[c2]
783  if confidence_info:
784  if confidence == '0.01':
785  markersize = 14 * scale_symbol_size
786  elif confidence == '0.05':
787  markersize = 9 * scale_symbol_size
788  elif confidence == '0.1':
789  markersize = 6 * scale_symbol_size
790  else:
791  markersize = 15 * scale_symbol_size
792  else:
793  markersize = 5 * scale_symbol_size
794  '''
795  ax.plot([pos1],
796  [pos2],
797  'o',
798  c=color,
799  alpha=alphablend,
800  markersize=markersize)
801 
802  ax.plot([pos2],
803  [pos1],
804  'o',
805  c=color,
806  alpha=alphablend,
807  markersize=markersize)
808  '''
809  x_list.append(pos1)
810  x_list.append(pos2)
811  y_list.append(pos2)
812  y_list.append(pos1)
813  color_list.append(color)
814  color_list.append(color)
815  xl["Distance"]=mdist
816  xl_list.append(xl)
817  xl_list.append(xl)
818 
819  xl_labels.append(label)
820  xl_coordinates_tuple_list.append((float(pos1),float(pos2)))
821  xl_labels.append(label+"*")
822  xl_coordinates_tuple_list.append((float(pos2),float(pos1)))
823 
824  points=ax.scatter(x_list,y_list,s=markersize,c=color_list,alpha=alphablend)
825 
826  # plot requested residue pairs
827  if display_residue_pairs:
828  for rp in self.residue_pair_list:
829  r1=rp[0]
830  c1=rp[1]
831  r2=rp[2]
832  c2=rp[3]
833 
834  try:
835  dist=self._get_distance(r1,c1,r2,c2)
836  except:
837  continue
838 
839  if dist<=40.0:
840  print(rp)
841  try:
842  pos1 = r1 + resoffsetx[c1]
843  except:
844  continue
845  try:
846  pos2 = r2 + resoffsety[c2]
847  except:
848  continue
849 
850  ax.plot([pos1],
851  [pos2],
852  '+',
853  c="blue",
854  alpha=0.1,
855  markersize=markersize)
856 
857  # display and write to file
858  fig.set_size_inches(0.002 * nresx, 0.002 * nresy)
859  [i.set_linewidth(2.0) for i in ax.spines.values()]
860  if cbar_labels is not None:
861  cbar = fig.colorbar(cax, ticks=[0.5,1.5,2.5,3.5])
862  cbar.ax.set_yticklabels(cbar_labels)# vertically oriented colorbar
863 
864  # set the labels using spring layout
865  if labels:
866  points=self.spring_layout(ax,plt,xl_coordinates_tuple_list, xl_labels)
867 
868  if filename:
869  #plt.show()
870  plt.savefig(filename, dpi=300,transparent="False")
871 
872 
873  if show_mpld3:
874  self.show_mpld3(fig,ax,points,xl_list,xl_labels)
Utility classes and functions for IO.
Definition: utilities.py:1
A Monte Carlo optimizer.
Definition: MonteCarlo.h:45
def load_sequence_from_fasta_file
read sequence.
Definition: xltable.py:145
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
def plot_table
plot the xlink table with optional contact map.
Definition: xltable.py:580
def set_residue_pairs_to_display
select the atom names of residue pairs to plot on the contact map list of residues types must be sing...
Definition: xltable.py:322
Upper bound harmonic function (non-zero when feature > mean)
static XYZR setup_particle(Model *m, ParticleIndex pi)
Definition: XYZR.h:48
Legacy PMI1 module to represent, score, sample and analyze models.
Set of python classes to create a multi-state, multi-resolution IMP hierarchy.
Utility classes and functions for reading and storing PMI files.
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name.
Return all close unordered pairs of particles taken from the SingletonContainer.
Distance restraint between two particles.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
Definition: XYZR.h:89
void read_pdb(TextInput input, int model, Hierarchy h)
Object used to hold a set of restraints.
Definition: RestraintSet.h:37
Tools for clustering and cluster analysis.
Definition: pmi1/Analysis.py:1
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:73
ParticleIndexPairs get_indexes(const ParticlePairsTemp &ps)
Get the indexes from a list of particle pairs.
A dictionary-like wrapper for reading and storing sequence data.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
Store a list of ParticleIndexes.
def deprecated_method
Python decorator to mark a method as deprecated.
Definition: __init__.py:10610
A decorator for a particle representing an atom.
Definition: atom/Atom.h:234
def load_crosslinks
read crosslinks from a CSV file.
Definition: xltable.py:289
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
Modify a set of continuous variables using a normal distribution.
Definition: NormalMover.h:20
def load_rmf_coordinates
read coordinates from a rmf file.
Definition: xltable.py:199
def deprecated_object
Python decorator to mark a class as deprecated.
Definition: __init__.py:10594
def load_pdb_coordinates
read coordinates from a pdb file.
Definition: xltable.py:161
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Class to handle individual particles of a Model object.
Definition: Particle.h:41
class to read, analyze, and plot xlink data on contact maps Canonical way to read the data: 1) load s...
Definition: xltable.py:28
Select all CA ATOM records.
Definition: pdb.h:77
static const FloatKeys & get_xyz_keys()
Get a vector containing the keys for x,y,z.
def setup_contact_map
loop through each distance map and get frequency of contacts
Definition: xltable.py:337
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Support for the RMF file format for storing hierarchical molecular data and markup.
Applies a list of movers one at a time.
Definition: SerialMover.h:23
Applies a PairScore to each Pair in a list.
Perform more efficient close pair finding when rigid bodies are involved.
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27