IMP logo
IMP Reference Guide  2.7.0
The Integrative Modeling Platform
pmi/Analysis.py
1 #!/usr/bin/env python
2 
3 """@namespace IMP.pmi.analysis
4  Tools for clustering and cluster analysis
5 """
6 from __future__ import print_function
7 import IMP
8 import IMP.algebra
9 import IMP.em
10 import IMP.pmi
11 import IMP.pmi.tools
12 import IMP.pmi.output
13 import IMP.rmf
14 import RMF
15 import IMP.pmi.analysis
16 from operator import itemgetter
17 from copy import deepcopy
18 from math import log,sqrt
19 import itertools
20 import numpy as np
21 
22 
23 class Alignment(object):
24  """Performs alignment and RMSD calculation for two sets of coordinates
25 
26  The class also takes into account non-equal stoichiometry of the proteins.
27  If this is the case, the protein names of proteins in multiple copies
28  should be specified in the following form:
29  nameA..1, nameA..2 (note two dots).
30  """
31 
32  def __init__(self, template, query, weights=None):
33  """Constructor.
34  @param query {'p1':coords(L,3), 'p2':coords(L,3)}
35  @param template {'p1':coords(L,3), 'p2':coords(L,3)}
36  @param weights optional weights for each set of coordinates
37  """
38  self.query = query
39  self.template = template
40  self.weights=weights
41 
42  if len(self.query.keys()) != len(self.template.keys()):
43  raise ValueError('''the number of proteins
44  in template and query does not match!''')
45 
46  def permute(self):
47  # get unique protein names for each protein
48  # this is, unfortunately, expecting that names are all 'Molname..X'
49  # where X is different for each copy.
50  self.proteins = sorted(self.query.keys())
51  prots_uniq = [i.split('..')[0] for i in self.proteins]
52 
53  # for each unique name, store list of permutations
54  # e.g. for keys A..1,A..2 store P[A] = [[A..1,A..2],[A..2,A..1]]
55  # then store the product: [[[A1,A2],[B1,B2]],[[A1,A2],[B2,B1]],
56  # [[A2,A1],[B1,B2]],[[A2,A1],[B2,B1]]]
57  P = {}
58  for p in prots_uniq:
59  np = prots_uniq.count(p)
60  copies = [i for i in self.proteins if i.split('..')[0] == p]
61  prmts = list(itertools.permutations(copies, len(copies)))
62  P[p] = prmts
63  self.P = P
64  self.Product = list(itertools.product(*P.values()))
65 
66  def get_rmsd(self):
67 
68  self.permute()
69 
70  template_xyz = []
71  weights = []
72  torder = sum([list(i) for i in self.Product[0]], [])
73  for t in torder:
74  template_xyz += [IMP.algebra.Vector3D(i) for i in self.template[t]]
75  if self.weights is not None:
76  weights += [i for i in self.weights[t]]
77  #template_xyz = np.array(template_xyz)
78 
79  self.rmsd = 10000000000.
80  for comb in self.Product:
81 
82  order = sum([list(i) for i in comb], [])
83  query_xyz = []
84  for p in order:
85  query_xyz += [IMP.algebra.Vector3D(i) for i in self.query[p]]
86  #query_xyz = np.array(query_xyz)
87  #if len(template_xyz) != len(query_xyz):
88  # print '''Alignment.get_rmsd: ERROR: the number of coordinates
89  # in template and query does not match!'''
90  # exit()
91 
92  if self.weights is not None:
93  dist=IMP.algebra.get_weighted_rmsd(template_xyz, query_xyz, weights)
94  else:
95  dist=IMP.algebra.get_rmsd(template_xyz, query_xyz)
96  #dist = sqrt(
97  # sum(np.diagonal(cdist(template_xyz, query_xyz) ** 2)) / len(template_xyz))
98  if dist < self.rmsd:
99  self.rmsd = dist
100  return self.rmsd
101 
102  def align(self):
103  from scipy.spatial.distance import cdist
104 
105  self.permute()
106 
107  # create flat coordinate list from template in standard order
108  # then loop through the permutations and try to align and get RMSD
109  # should allow you to try all mappings within each protein
110  template_xyz = []
111  torder = sum([list(i) for i in self.Product[0]], [])
112  for t in torder:
113  template_xyz += [IMP.algebra.Vector3D(i) for i in self.template[t]]
114  # template_xyz = np.array(template_xyz)
115  self.rmsd, Transformation = 10000000000., ''
116 
117  # then for each permutation, get flat list of coords and get RMSD
118  for comb in self.Product:
119  order = sum([list(i) for i in comb], [])
120  query_xyz = []
121  for p in order:
122  query_xyz += [IMP.algebra.Vector3D(i) for i in self.query[p]]
123  #query_xyz = np.array(query_xyz)
124 
125  if len(template_xyz) != len(query_xyz):
126  raise ValueError('''the number of coordinates
127  in template and query does not match!''')
128 
130  query_xyz,
131  template_xyz)
132  query_xyz_tr = [transformation.get_transformed(n)
133  for n in query_xyz]
134 
135  dist = sqrt(
136  sum(np.diagonal(cdist(template_xyz, query_xyz_tr) ** 2)) / len(template_xyz))
137  if dist < self.rmsd:
138  self.rmsd = dist
139  Transformation = transformation
140 
141  # return the transformation
142  return (self.rmsd, Transformation)
143 
144 
145 # TEST for the alignment ###
146 """
147 Proteins = {'a..1':np.array([np.array([-1.,1.])]),
148  'a..2':np.array([np.array([1.,1.,])]),
149  'a..3':np.array([np.array([-2.,1.])]),
150  'b':np.array([np.array([0.,-1.])]),
151  'c..1':np.array([np.array([-1.,-1.])]),
152  'c..2':np.array([np.array([1.,-1.])]),
153  'd':np.array([np.array([0.,0.])]),
154  'e':np.array([np.array([0.,1.])])}
155 
156 Ali = Alignment(Proteins, Proteins)
157 Ali.permute()
158 if Ali.get_rmsd() == 0.0: print 'successful test!'
159 else: print 'ERROR!'; exit()
160 """
161 
162 
163 # ----------------------------------
164 class Violations(object):
165 
166  def __init__(self, filename):
167 
168  self.violation_thresholds = {}
169  self.violation_counts = {}
170 
171  data = open(filename)
172  D = data.readlines()
173  data.close()
174 
175  for d in D:
176  d = d.strip().split()
177  self.violation_thresholds[d[0]] = float(d[1])
178 
179  def get_number_violated_restraints(self, rsts_dict):
180  num_violated = 0
181  for rst in self.violation_thresholds:
182  if rst not in rsts_dict:
183  continue # print rst;
184  if float(rsts_dict[rst]) > self.violation_thresholds[rst]:
185  num_violated += 1
186  if rst not in self.violation_counts:
187  self.violation_counts[rst] = 1
188  else:
189  self.violation_counts[rst] += 1
190  return num_violated
191 
192 
193 # ----------------------------------
194 class Clustering(object):
195  """A class to cluster structures.
196  Uses scipy's cdist function to compute distance matrices
197  and sklearn's kmeans clustering module.
198  """
199  def __init__(self,rmsd_weights=None):
200  """Constructor.
201  @param rmsd_weights Flat list of weights for each particle
202  (if they're coarse)
203  """
204  try:
205  from mpi4py import MPI
206  self.comm = MPI.COMM_WORLD
207  self.rank = self.comm.Get_rank()
208  self.number_of_processes = self.comm.size
209  except ImportError:
210  self.number_of_processes = 1
211  self.rank = 0
212  self.all_coords = {}
213  self.structure_cluster_ids = None
214  self.tmpl_coords = None
215  self.rmsd_weights=rmsd_weights
216 
217  def set_template(self, part_coords):
218 
219  self.tmpl_coords = part_coords
220 
221  def fill(self, frame, Coords):
222  """Add coordinates for a single model."""
223 
224  self.all_coords[frame] = Coords
225 
226  def dist_matrix(self):
227 
228  self.model_list_names = list(self.all_coords.keys())
229  self.model_indexes = list(range(len(self.model_list_names)))
230  self.model_indexes_dict = dict(
231  list(zip(self.model_list_names, self.model_indexes)))
232  model_indexes_unique_pairs = list(itertools.combinations(self.model_indexes, 2))
233 
234  my_model_indexes_unique_pairs = IMP.pmi.tools.chunk_list_into_segments(
235  model_indexes_unique_pairs,
236  self.number_of_processes)[self.rank]
237 
238  print("process %s assigned with %s pairs" % (str(self.rank), str(len(my_model_indexes_unique_pairs))))
239 
240  (raw_distance_dict, self.transformation_distance_dict) = self.matrix_calculation(self.all_coords,
241  self.tmpl_coords,
242  my_model_indexes_unique_pairs)
243 
244  if self.number_of_processes > 1:
245  raw_distance_dict = IMP.pmi.tools.scatter_and_gather(
246  raw_distance_dict)
247  pickable_transformations = self.get_pickable_transformation_distance_dict(
248  )
249  pickable_transformations = IMP.pmi.tools.scatter_and_gather(
250  pickable_transformations)
251  self.set_transformation_distance_dict_from_pickable(
252  pickable_transformations)
253 
254  self.raw_distance_matrix = np.zeros(
255  (len(self.model_list_names), len(self.model_list_names)))
256  for item in raw_distance_dict:
257  (f1, f2) = item
258  self.raw_distance_matrix[f1, f2] = raw_distance_dict[item]
259  self.raw_distance_matrix[f2, f1] = raw_distance_dict[item]
260 
261  def get_dist_matrix(self):
262  return self.raw_distance_matrix
263 
264  def do_cluster(self, number_of_clusters,seed=None):
265  """Run K-means clustering
266  @param number_of_clusters Num means
267  @param seed the random seed
268  """
269  from sklearn.cluster import KMeans
270  if seed is not None:
271  np.random.seed(seed)
272  try:
273  # check whether we have the right version of sklearn
274  kmeans = KMeans(n_clusters=number_of_clusters)
275  except TypeError:
276  # sklearn older than 0.12
277  kmeans = KMeans(k=number_of_clusters)
278  kmeans.fit_predict(self.raw_distance_matrix)
279 
280  self.structure_cluster_ids = kmeans.labels_
281 
282  def get_pickable_transformation_distance_dict(self):
283  pickable_transformations = {}
284  for label in self.transformation_distance_dict:
285  tr = self.transformation_distance_dict[label]
286  trans = tuple(tr.get_translation())
287  rot = tuple(tr.get_rotation().get_quaternion())
288  pickable_transformations[label] = (rot, trans)
289  return pickable_transformations
290 
291  def set_transformation_distance_dict_from_pickable(
292  self,
293  pickable_transformations):
294  self.transformation_distance_dict = {}
295  for label in pickable_transformations:
296  tr = pickable_transformations[label]
297  trans = IMP.algebra.Vector3D(tr[1])
298  rot = IMP.algebra.Rotation3D(tr[0])
299  self.transformation_distance_dict[
300  label] = IMP.algebra.Transformation3D(rot, trans)
301 
302  def save_distance_matrix_file(self, file_name='cluster.rawmatrix.pkl'):
303  import pickle
304  outf = open(file_name + ".data", 'wb')
305 
306  # to pickle the transformation dictionary
307  # you have to save the arrays correposnding to
308  # the transformations
309 
310  pickable_transformations = self.get_pickable_transformation_distance_dict(
311  )
312  pickle.dump(
313  (self.structure_cluster_ids,
314  self.model_list_names,
315  pickable_transformations),
316  outf)
317 
318  np.save(file_name + ".npy", self.raw_distance_matrix)
319 
320  def load_distance_matrix_file(self, file_name='cluster.rawmatrix.pkl'):
321  import pickle
322 
323  inputf = open(file_name + ".data", 'rb')
324  (self.structure_cluster_ids, self.model_list_names,
325  pickable_transformations) = pickle.load(inputf)
326  inputf.close()
327 
328  self.raw_distance_matrix = np.load(file_name + ".npy")
329 
330  self.set_transformation_distance_dict_from_pickable(
331  pickable_transformations)
332  self.model_indexes = list(range(len(self.model_list_names)))
333  self.model_indexes_dict = dict(
334  list(zip(self.model_list_names, self.model_indexes)))
335 
336  def plot_matrix(self, figurename="clustermatrix.pdf"):
337  import matplotlib as mpl
338  mpl.use('Agg')
339  import matplotlib.pylab as pl
340  from scipy.cluster import hierarchy as hrc
341 
342  fig = pl.figure(figsize=(10,8))
343  ax = fig.add_subplot(212)
344  dendrogram = hrc.dendrogram(
345  hrc.linkage(self.raw_distance_matrix),
346  color_threshold=7,
347  no_labels=True)
348  leaves_order = dendrogram['leaves']
349  ax.set_xlabel('Model')
350  ax.set_ylabel('RMSD [Angstroms]')
351 
352  ax2 = fig.add_subplot(221)
353  cax = ax2.imshow(
354  self.raw_distance_matrix[leaves_order,
355  :][:,
356  leaves_order],
357  interpolation='nearest')
358  cb = fig.colorbar(cax)
359  cb.set_label('RMSD [Angstroms]')
360  ax2.set_xlabel('Model')
361  ax2.set_ylabel('Model')
362 
363  pl.savefig(figurename, dpi=300)
364  pl.close(fig)
365 
366  def get_model_index_from_name(self, name):
367  return self.model_indexes_dict[name]
368 
369  def get_cluster_labels(self):
370  # this list
371  return list(set(self.structure_cluster_ids))
372 
373  def get_number_of_clusters(self):
374  return len(self.get_cluster_labels())
375 
376  def get_cluster_label_indexes(self, label):
377  return (
378  [i for i, l in enumerate(self.structure_cluster_ids) if l == label]
379  )
380 
381  def get_cluster_label_names(self, label):
382  return (
383  [self.model_list_names[i]
384  for i in self.get_cluster_label_indexes(label)]
385  )
386 
387  def get_cluster_label_average_rmsd(self, label):
388 
389  indexes = self.get_cluster_label_indexes(label)
390 
391  if len(indexes) > 1:
392  sub_distance_matrix = self.raw_distance_matrix[
393  indexes, :][:, indexes]
394  average_rmsd = np.sum(sub_distance_matrix) / \
395  (len(sub_distance_matrix)
396  ** 2 - len(sub_distance_matrix))
397  else:
398  average_rmsd = 0.0
399  return average_rmsd
400 
401  def get_cluster_label_size(self, label):
402  return len(self.get_cluster_label_indexes(label))
403 
404  def get_transformation_to_first_member(
405  self,
406  cluster_label,
407  structure_index):
408  reference = self.get_cluster_label_indexes(cluster_label)[0]
409  return self.transformation_distance_dict[(reference, structure_index)]
410 
411  def matrix_calculation(self, all_coords, template_coords, list_of_pairs):
412 
413  model_list_names = list(all_coords.keys())
414  rmsd_protein_names = list(all_coords[model_list_names[0]].keys())
415  raw_distance_dict = {}
416  transformation_distance_dict = {}
417  if template_coords is None:
418  do_alignment = False
419  else:
420  do_alignment = True
421  alignment_template_protein_names = list(template_coords.keys())
422 
423  for (f1, f2) in list_of_pairs:
424 
425  if not do_alignment:
426  # here we only get the rmsd,
427  # we need that for instance when you want to cluster conformations
428  # globally, eg the EM map is a reference
430 
431  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
432  for pr in rmsd_protein_names])
433  coords_f2 = {}
434  for pr in rmsd_protein_names:
435  coords_f2[pr] = all_coords[model_list_names[f2]][pr]
436 
437  Ali = Alignment(coords_f1, coords_f2, self.rmsd_weights)
438  rmsd = Ali.get_rmsd()
439 
440  elif do_alignment:
441  # here we actually align the conformations first
442  # and than calculate the rmsd. We need that when the
443  # protein(s) is the reference
444  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
445  for pr in alignment_template_protein_names])
446  coords_f2 = dict([(pr, all_coords[model_list_names[f2]][pr])
447  for pr in alignment_template_protein_names])
448 
449  Ali = Alignment(coords_f1, coords_f2)
450  template_rmsd, transformation = Ali.align()
451 
452  # here we calculate the rmsd
453  # we will align two models based n the nuber of subunits provided
454  # and transform coordinates of model 2 to model 1
455  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
456  for pr in rmsd_protein_names])
457  coords_f2 = {}
458  for pr in rmsd_protein_names:
459  coords_f2[pr] = [transformation.get_transformed(
460  i) for i in all_coords[model_list_names[f2]][pr]]
461 
462  Ali = Alignment(coords_f1, coords_f2, self.rmsd_weights)
463  rmsd = Ali.get_rmsd()
464 
465  raw_distance_dict[(f1, f2)] = rmsd
466  raw_distance_dict[(f2, f1)] = rmsd
467  transformation_distance_dict[(f1, f2)] = transformation
468  transformation_distance_dict[(f2, f1)] = transformation
469 
470  return raw_distance_dict, transformation_distance_dict
471 
472 
473 class Precision(object):
474  """A class to evaluate the precision of an ensemble.
475 
476  Also can evaluate the cross-precision of multiple ensembles.
477  Supports MPI for coordinate reading.
478  Recommended procedure:
479  -# initialize object and pass the selection for evaluating precision
480  -# call add_structures() to read in the data (specify group name)
481  -# call get_precision() to evaluate inter/intra precision
482  -# call get_rmsf() to evaluate within-group fluctuations
483  """
484  def __init__(self,model,
485  resolution=1,
486  selection_dictionary={}):
487  """Constructor.
488  @param model The IMP Model
489  @param resolution Use 1 or 10 (kluge: requires that "_Res:X" is
490  part of the hier name)
491  @param selection_dictionary Dictionary where keys are names for
492  selections and values are selection tuples for scoring
493  precision. "All" is automatically made as well
494  """
495  try:
496  from mpi4py import MPI
497  self.comm = MPI.COMM_WORLD
498  self.rank = self.comm.Get_rank()
499  self.number_of_processes = self.comm.size
500  except ImportError:
501  self.number_of_processes = 1
502  self.rank = 0
503 
504  self.styles = ['pairwise_rmsd','pairwise_drmsd_k','pairwise_drmsd_Q',
505  'pairwise_drms_k','pairwise_rmsd','drmsd_from_center']
506  self.style = 'pairwise_drmsd_k'
507  self.structures_dictionary = {}
508  self.reference_structures_dictionary = {}
509  self.prots = []
510  self.protein_names = None
511  self.len_particles_resolution_one = None
512  self.model = model
513  self.rmf_names_frames = {}
514  self.reference_rmf_names_frames = None
515  self.reference_structure = None
516  self.reference_prot = None
517  self.selection_dictionary = selection_dictionary
518  self.threshold = 40.0
519  self.residue_particle_index_map = None
520  self.prots = None
521  if resolution in [1,10]:
522  self.resolution = resolution
523  else:
524  raise KeyError("Currently only allow resolution 1 or 10")
525 
526  def _get_structure(self,rmf_frame_index,rmf_name):
527  """Read an RMF file and return the particles"""
528  rh = RMF.open_rmf_file_read_only(rmf_name)
529  if not self.prots:
530  print("getting coordinates for frame %i rmf file %s" % (rmf_frame_index, rmf_name))
531  self.prots = IMP.rmf.create_hierarchies(rh, self.model)
532  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
533  else:
534  print("linking coordinates for frame %i rmf file %s" % (rmf_frame_index, rmf_name))
535  IMP.rmf.link_hierarchies(rh, self.prots)
536  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
537  del rh
538 
539  if self.resolution==1:
540  particle_dict = get_particles_at_resolution_one(self.prots[0])
541  elif self.resolution==10:
542  particle_dict = get_particles_at_resolution_ten(self.prots[0])
543 
544  protein_names = list(particle_dict.keys())
545  particles_resolution_one = []
546  for k in particle_dict:
547  particles_resolution_one += (particle_dict[k])
548 
549  if self.protein_names==None:
550  self.protein_names = protein_names
551  else:
552  if self.protein_names!=protein_names:
553  print("Error: the protein names of the new coordinate set is not compatible with the previous one")
554 
555  if self.len_particles_resolution_one==None:
556  self.len_particles_resolution_one = len(particles_resolution_one)
557  else:
558  if self.len_particles_resolution_one!=len(particles_resolution_one):
559  raise ValueError("the new coordinate set is not compatible with the previous one")
560 
561  return particles_resolution_one
562 
563  def add_structure(self,
564  rmf_name,
565  rmf_frame_index,
566  structure_set_name,
567  setup_index_map=False):
568  """ Read a structure into the ensemble and store (as coordinates).
569  @param rmf_name The name of the RMF file
570  @param rmf_frame_index The frame to read
571  @param structure_set_name Name for the set that includes this structure
572  (e.g. "cluster 1")
573  @param setup_index_map if requested, set up a dictionary to help
574  find residue indexes
575  """
576 
577  # decide where to put this structure
578  if structure_set_name in self.structures_dictionary:
579  cdict = self.structures_dictionary[structure_set_name]
580  rmflist = self.rmf_names_frames[structure_set_name]
581  else:
582  self.structures_dictionary[structure_set_name]={}
583  self.rmf_names_frames[structure_set_name]=[]
584  cdict = self.structures_dictionary[structure_set_name]
585  rmflist = self.rmf_names_frames[structure_set_name]
586 
587  # read the particles
588  try:
589  particles_resolution_one = self._get_structure(rmf_frame_index,rmf_name)
590  except ValueError:
591  print("something wrong with the rmf")
592  return 0
593  self.selection_dictionary.update({"All":self.protein_names})
594 
595  for selection_name in self.selection_dictionary:
596  selection_tuple = self.selection_dictionary[selection_name]
597  coords = self._select_coordinates(selection_tuple,particles_resolution_one,self.prots[0])
598  if selection_name not in cdict:
599  cdict[selection_name] = [coords]
600  else:
601  cdict[selection_name].append(coords)
602 
603  rmflist.append((rmf_name,rmf_frame_index))
604 
605  # if requested, set up a dictionary to help find residue indexes
606  if setup_index_map:
607  self.residue_particle_index_map = {}
608  for prot_name in self.protein_names:
609  self.residue_particle_index_map[prot_name] = \
610  self._get_residue_particle_index_map(
611  prot_name,
612  particles_resolution_one,self.prots[0])
613 
614  def add_structures(self,
615  rmf_name_frame_tuples,
616  structure_set_name):
617  """Read a list of RMFs, supports parallel
618  @param rmf_name_frame_tuples list of (rmf_file_name,frame_number)
619  @param structure_set_name Name this set of structures (e.g. "cluster.1")
620  """
621 
622  # split up the requested list to read in parallel
623  my_rmf_name_frame_tuples=IMP.pmi.tools.chunk_list_into_segments(
624  rmf_name_frame_tuples,self.number_of_processes)[self.rank]
625  for nfr,tup in enumerate(my_rmf_name_frame_tuples):
626  rmf_name=tup[0]
627  rmf_frame_index=tup[1]
628  # the first frame stores the map between residues and particles
629  if self.residue_particle_index_map is None:
630  setup_index_map = True
631  else:
632  setup_index_map = False
633  self.add_structure(rmf_name,
634  rmf_frame_index,
635  structure_set_name,
636  setup_index_map)
637 
638  # synchronize the structures
639  if self.number_of_processes > 1:
640  self.rmf_names_frames=IMP.pmi.tools.scatter_and_gather(self.rmf_names_frames)
641  if self.rank != 0:
642  self.comm.send(self.structures_dictionary, dest=0, tag=11)
643  elif self.rank == 0:
644  for i in range(1, self.number_of_processes):
645  data_tmp = self.comm.recv(source=i, tag=11)
646  for key in self.structures_dictionary:
647  self.structures_dictionary[key].update(data_tmp[key])
648  for i in range(1, self.number_of_processes):
649  self.comm.send(self.structures_dictionary, dest=i, tag=11)
650  if self.rank != 0:
651  self.structures_dictionary = self.comm.recv(source=0, tag=11)
652 
653  def _get_residue_particle_index_map(self,prot_name,structure,hier):
654  # Creates map from all particles to residue numbers
655  residue_particle_index_map = []
656  if IMP.pmi.get_is_canonical(hier):
657  s = IMP.atom.Selection(hier,molecules=[prot_name],
658  resolution=1)
659  else:
660  s = IMP.atom.Selection(hier,molecules=[prot_name])
661  all_selected_particles = s.get_selected_particles()
662  intersection = list(set(all_selected_particles) & set(structure))
663  sorted_intersection = IMP.pmi.tools.sort_by_residues(intersection)
664  for p in sorted_intersection:
665  residue_particle_index_map.append(IMP.pmi.tools.get_residue_indexes(p))
666  return residue_particle_index_map
667 
668 
669  def _select_coordinates(self,tuple_selections,structure,prot):
670  selected_coordinates=[]
671  for t in tuple_selections:
672  if type(t)==tuple and len(t)==3:
673  if IMP.pmi.get_is_canonical(prot):
674  s = IMP.atom.Selection(prot,molecules=[t[2]],residue_indexes=range(t[0],t[1]+1),
675  resolution=1)
676  else:
677  s = IMP.atom.Selection(prot,molecules=[t[2]],residue_indexes=range(t[0],t[1]+1))
678  all_selected_particles = s.get_selected_particles()
679  intersection = list(set(all_selected_particles) & set(structure))
680  sorted_intersection = IMP.pmi.tools.sort_by_residues(intersection)
681  cc = [tuple(IMP.core.XYZ(p).get_coordinates()) for p in sorted_intersection]
682  selected_coordinates += cc
683  elif type(t)==str:
684  if IMP.pmi.get_is_canonical(prot):
685  s = IMP.atom.Selection(prot,molecules=[t],resolution=1)
686  else:
687  s = IMP.atom.Selection(prot,molecules=[t])
688  all_selected_particles = s.get_selected_particles()
689  intersection = list(set(all_selected_particles) & set(structure))
690  sorted_intersection = IMP.pmi.tools.sort_by_residues(intersection)
691  cc = [tuple(IMP.core.XYZ(p).get_coordinates()) for p in sorted_intersection]
692  selected_coordinates += cc
693  else:
694  raise ValueError("Selection error")
695  return selected_coordinates
696 
697  def set_threshold(self,threshold):
698  self.threshold = threshold
699 
700  def _get_distance(self,
701  structure_set_name1,
702  structure_set_name2,
703  selection_name,
704  index1,
705  index2):
706  """ Compute distance between structures with various metrics """
707  c1 = self.structures_dictionary[structure_set_name1][selection_name][index1]
708  c2 = self.structures_dictionary[structure_set_name2][selection_name][index2]
709  coordinates1 = [IMP.algebra.Vector3D(c) for c in c1]
710  coordinates2 = [IMP.algebra.Vector3D(c) for c in c2]
711 
712  if self.style=='pairwise_drmsd_k':
713  distance=IMP.atom.get_drmsd(coordinates1,coordinates2)
714  if self.style=='pairwise_drms_k':
715  distance=IMP.atom.get_drms(coordinates1,coordinates2)
716  if self.style=='pairwise_drmsd_Q':
717  distance=IMP.atom.get_drmsd_Q(coordinates1,coordinates2,self.threshold)
718 
719  if self.style=='pairwise_rmsd':
720  distance=IMP.algebra.get_rmsd(coordinates1,coordinates2)
721  return distance
722 
723  def _get_particle_distances(self,structure_set_name1,structure_set_name2,
724  selection_name,index1,index2):
725  c1 = self.structures_dictionary[structure_set_name1][selection_name][index1]
726  c2 = self.structures_dictionary[structure_set_name2][selection_name][index2]
727 
728  coordinates1 = [IMP.algebra.Vector3D(c) for c in c1]
729  coordinates2 = [IMP.algebra.Vector3D(c) for c in c2]
730 
731  distances=[np.linalg.norm(a-b) for (a,b) in zip(coordinates1,coordinates2)]
732 
733  return distances
734 
735  def get_precision(self,
736  structure_set_name1,
737  structure_set_name2,
738  outfile=None,
739  skip=1,
740  selection_keywords=None):
741  """ Evaluate the precision of two named structure groups. Supports MPI.
742  When the structure_set_name1 is different from the structure_set_name2,
743  this evaluates the cross-precision (average pairwise distances).
744  @param outfile Name of the precision output file
745  @param structure_set_name1 string name of the first structure set
746  @param structure_set_name2 string name of the second structure set
747  @param skip analyze every (skip) structure for the distance matrix calculation
748  @param selection_keywords Specify the selection name you want to calculate on.
749  By default this is computed for everything you provided in the constructor,
750  plus all the subunits together.
751  """
752  if selection_keywords is None:
753  sel_keys = list(self.selection_dictionary.keys())
754  else:
755  for k in selection_keywords:
756  if k not in self.selection_dictionary:
757  raise KeyError("you are trying to find named selection " \
758  + k + " which was not requested in the constructor")
759  sel_keys = selection_keywords
760 
761  if outfile is not None:
762  of = open(outfile,"w")
763  centroid_index = 0
764  for selection_name in sel_keys:
765  number_of_structures_1 = len(self.structures_dictionary[structure_set_name1][selection_name])
766  number_of_structures_2 = len(self.structures_dictionary[structure_set_name2][selection_name])
767  distances={}
768  structure_pointers_1 = list(range(0,number_of_structures_1,skip))
769  structure_pointers_2 = list(range(0,number_of_structures_2,skip))
770  pair_combination_list = list(itertools.product(structure_pointers_1,structure_pointers_2))
771  if len(pair_combination_list)==0:
772  raise ValueError("no structure selected. Check the skip parameter.")
773 
774  # compute pairwise distances in parallel
775  my_pair_combination_list = IMP.pmi.tools.chunk_list_into_segments(
776  pair_combination_list,self.number_of_processes)[self.rank]
777  my_length = len(my_pair_combination_list)
778  for n,pair in enumerate(my_pair_combination_list):
779  progression = int(float(n)/my_length*100.0)
780  distances[pair] = self._get_distance(structure_set_name1,structure_set_name2,
781  selection_name,pair[0],pair[1])
782  if self.number_of_processes > 1:
783  distances = IMP.pmi.tools.scatter_and_gather(distances)
784 
785  # Finally compute distance to centroid
786  if self.rank == 0:
787  if structure_set_name1==structure_set_name2:
788  structure_pointers = structure_pointers_1
789  number_of_structures = number_of_structures_1
790 
791  # calculate the distance from the first centroid
792  # and determine the centroid
793  distance = 0.0
794  distances_to_structure = {}
795  distances_to_structure_normalization = {}
796 
797  for n in structure_pointers:
798  distances_to_structure[n] = 0.0
799  distances_to_structure_normalization[n]=0
800 
801  for k in distances:
802  distance += distances[k]
803  distances_to_structure[k[0]] += distances[k]
804  distances_to_structure[k[1]] += distances[k]
805  distances_to_structure_normalization[k[0]] += 1
806  distances_to_structure_normalization[k[1]] += 1
807 
808  for n in structure_pointers:
809  distances_to_structure[n] = distances_to_structure[n]/distances_to_structure_normalization[n]
810 
811  min_distance = min([distances_to_structure[n] for n in distances_to_structure])
812  centroid_index = [k for k, v in distances_to_structure.items() if v == min_distance][0]
813  centroid_rmf_name = self.rmf_names_frames[structure_set_name1][centroid_index]
814 
815  centroid_distance = 0.0
816  distance_list = []
817  for n in range(number_of_structures):
818  dist = self._get_distance(structure_set_name1,structure_set_name1,
819  selection_name,centroid_index,n)
820  centroid_distance += dist
821  distance_list.append(dist)
822 
823  #pairwise_distance=distance/len(distances.keys())
824  centroid_distance /= number_of_structures
825  #average_centroid_distance=sum(distances_to_structure)/len(distances_to_structure)
826  if outfile is not None:
827  of.write(str(selection_name)+" "+structure_set_name1+
828  " average centroid distance "+str(centroid_distance)+"\n")
829  of.write(str(selection_name)+" "+structure_set_name1+
830  " centroid index "+str(centroid_index)+"\n")
831  of.write(str(selection_name)+" "+structure_set_name1+
832  " centroid rmf name "+str(centroid_rmf_name)+"\n")
833  of.write(str(selection_name)+" "+structure_set_name1+
834  " median centroid distance "+str(np.median(distance_list))+"\n")
835 
836  average_pairwise_distances=sum(distances.values())/len(list(distances.values()))
837  if outfile is not None:
838  of.write(str(selection_name)+" "+structure_set_name1+" "+structure_set_name2+
839  " average pairwise distance "+str(average_pairwise_distances)+"\n")
840  if outfile is not None:
841  of.close()
842  return centroid_index
843 
844  def get_rmsf(self,
845  structure_set_name,
846  outdir="./",
847  skip=1,
848  set_plot_yaxis_range=None):
849  """ Calculate the residue mean square fluctuations (RMSF).
850  Automatically outputs as data file and pdf
851  @param structure_set_name Which structure set to calculate RMSF for
852  @param outdir Where to write the files
853  @param skip Skip this number of structures
854  @param set_plot_yaxis_range In case you need to change the plot
855  """
856  # get the centroid structure for the whole complex
857  centroid_index = self.get_precision(structure_set_name,
858  structure_set_name,
859  outfile=None,
860  skip=skip)
861  if self.rank==0:
862  for sel_name in self.protein_names:
863  self.selection_dictionary.update({sel_name:[sel_name]})
864  try:
865  number_of_structures = len(self.structures_dictionary[structure_set_name][sel_name])
866  except KeyError:
867  # that protein was not included in the selection
868  continue
869  rpim = self.residue_particle_index_map[sel_name]
870  outfile = outdir+"/rmsf."+sel_name+".dat"
871  of = open(outfile,"w")
872  residue_distances = {}
873  residue_nblock = {}
874  for index in range(number_of_structures):
875  distances = self._get_particle_distances(structure_set_name,
876  structure_set_name,
877  sel_name,
878  centroid_index,index)
879  for nblock,block in enumerate(rpim):
880  for residue_number in block:
881  residue_nblock[residue_number] = nblock
882  if residue_number not in residue_distances:
883  residue_distances[residue_number] = [distances[nblock]]
884  else:
885  residue_distances[residue_number].append(distances[nblock])
886 
887  residues = []
888  rmsfs = []
889  for rn in residue_distances:
890  residues.append(rn)
891  rmsf = np.std(residue_distances[rn])
892  rmsfs.append(rmsf)
893  of.write(str(rn)+" "+str(residue_nblock[rn])+" "+str(rmsf)+"\n")
894 
895  IMP.pmi.output.plot_xy_data(residues,rmsfs,title=sel_name,
896  out_fn=outdir+"/rmsf."+sel_name,display=False,
897  set_plot_yaxis_range=set_plot_yaxis_range,
898  xlabel='Residue Number',ylabel='Standard error')
899  of.close()
900 
901 
902  def set_reference_structure(self,rmf_name,rmf_frame_index):
903  """Read in a structure used for reference computation.
904  Needed before calling get_average_distance_wrt_reference_structure()
905  @param rmf_name The RMF file to read the reference
906  @param rmf_frame_index The index in that file
907  """
908  particles_resolution_one = self._get_structure(rmf_frame_index,rmf_name)
909  self.reference_rmf_names_frames = (rmf_name,rmf_frame_index)
910 
911  for selection_name in self.selection_dictionary:
912  selection_tuple = self.selection_dictionary[selection_name]
913  coords = self._select_coordinates(selection_tuple,
914  particles_resolution_one,self.prots[0])
915  self.reference_structures_dictionary[selection_name] = coords
916 
917  def get_rmsd_wrt_reference_structure_with_alignment(self,structure_set_name,alignment_selection_key):
918  """First align then calculate RMSD
919  @param structure_set_name: the name of the structure set
920  @param alignment_selection: the key containing the selection tuples needed to make the alignment stored in self.selection_dictionary
921  @return: for each structure in the structure set, returns the rmsd
922  """
923  if self.reference_structures_dictionary=={}:
924  print("Cannot compute until you set a reference structure")
925  return
926 
927  align_reference_coordinates = self.reference_structures_dictionary[alignment_selection_key]
928  align_coordinates = self.structures_dictionary[structure_set_name][alignment_selection_key]
929  transformations = []
930  for c in align_coordinates:
931  Ali = IMP.pmi.analysis.Alignment({"All":align_reference_coordinates}, {"All":c})
932  transformation = Ali.align()[1]
933  transformations.append(transformation)
934  for selection_name in self.selection_dictionary:
935  reference_coordinates = self.reference_structures_dictionary[selection_name]
936  coordinates2 = [IMP.algebra.Vector3D(c) for c in reference_coordinates]
937  distances = []
938  for n,sc in enumerate(self.structures_dictionary[structure_set_name][selection_name]):
939  coordinates1 = [transformations[n].get_transformed(IMP.algebra.Vector3D(c)) for c in sc]
940  distance = IMP.algebra.get_rmsd(coordinates1,coordinates2)
941  distances.append(distance)
942  print(selection_name,"average rmsd",sum(distances)/len(distances),"median",self._get_median(distances),"minimum distance",min(distances))
943 
944  def _get_median(self,list_of_values):
945  return np.median(np.array(list_of_values))
946 
947  def get_average_distance_wrt_reference_structure(self,structure_set_name):
948  """Compare the structure set to the reference structure.
949  @param structure_set_name The structure set to compute this on
950  @note First call set_reference_structure()
951  """
952  ret = {}
953  if self.reference_structures_dictionary=={}:
954  print("Cannot compute until you set a reference structure")
955  return
956  for selection_name in self.selection_dictionary:
957  reference_coordinates = self.reference_structures_dictionary[selection_name]
958  coordinates2 = [IMP.algebra.Vector3D(c) for c in reference_coordinates]
959  distances = []
960  for sc in self.structures_dictionary[structure_set_name][selection_name]:
961  coordinates1 = [IMP.algebra.Vector3D(c) for c in sc]
962  if self.style=='pairwise_drmsd_k':
963  distance = IMP.atom.get_drmsd(coordinates1,coordinates2)
964  if self.style=='pairwise_drms_k':
965  distance = IMP.atom.get_drms(coordinates1,coordinates2)
966  if self.style=='pairwise_drmsd_Q':
967  distance = IMP.atom.get_drmsd_Q(coordinates1,coordinates2,self.threshold)
968  if self.style=='pairwise_rmsd':
969  distance = IMP.algebra.get_rmsd(coordinates1,coordinates2)
970  distances.append(distance)
971 
972  print(selection_name,"average distance",sum(distances)/len(distances),"minimum distance",min(distances),'nframes',len(distances))
973  ret[selection_name] = {'average_distance':sum(distances)/len(distances),'minimum_distance':min(distances)}
974  return ret
975 
976  def get_coordinates(self):
977  pass
978 
979  def set_precision_style(self, style):
980  if style in self.styles:
981  self.style=style
982  else:
983  raise ValueError("No such style")
984 
985 class GetRMSD(object):
986  """This object can be added to output_objects in (for example in the ReplicaExchange0 macro).
987  It will load a frame from an rmf file and compute the rmsd of a hierarchy with respect to that reference structure.
988  The output can be parsed from the stat.*.out files.
989  """
990  import math
991 
992  def __init__(self, hier, rmf_file_name, rmf_frame_index, rmf_state_index, label):
993  """Constructor.
994  @param hier hierarchy of the structure
995  @param rmf_file_name path to the rmf file containing the reference structure
996  @param rmf_frame_index the index of the frame containing the reference structure in the rmf file
997  @param voxel rmf_state_index the index of the state containing the reference structure in the rmf file
998  @param label label that will be used when parsing the stat file
999  """
1000 
1001  rmf_fh = RMF.open_rmf_file_read_only(rmf_file_name)
1002  hh = IMP.rmf.create_hierarchies(rmf_fh, hier.get_model())
1003  self.rmf_state = hh[0].get_children()[rmf_state_index]
1004  self.current_state = hier.get_children()[rmf_state_index]
1005  IMP.rmf.load_frame(rmf_fh, RMF.FrameID(rmf_frame_index))
1006  self.current_moldict = {}
1007  self.rmf_moldict = {}
1008  for mol in self.rmf_state.get_children():
1009  name = mol.get_name()
1010  if name in self.rmf_moldict:
1011  self.rmf_moldict[name].append(mol)
1012  else:
1013  self.rmf_moldict[name] = [mol]
1014  for mol in self.current_state.get_children():
1015  name = mol.get_name()
1016  if name in self.current_moldict:
1017  self.current_moldict[name].append(mol)
1018  else:
1019  self.current_moldict[name] = [mol]
1020  self.label = label
1021 
1022  def get_output(self):
1023  total_rmsd = 0
1024  total_N = 0
1025  for molname, rmf_mols in self.rmf_moldict.iteritems():
1026  sel_rmf = IMP.atom.Selection(
1027  rmf_mols, representation_type=IMP.atom.BALLS)
1028  N = len(sel_rmf.get_selected_particle_indexes())
1029  total_N += N
1030  rmsd = []
1031  for current_mols in itertools.permutations(self.current_moldict[molname]):
1032  sel_current = IMP.atom.Selection(
1033  current_mols, representation_type=IMP.atom.BALLS)
1034  rmsd.append(IMP.atom.get_rmsd(sel_rmf, sel_current))
1035  m = min(rmsd)
1036  total_rmsd += m * m * N
1037  return {self.label: "%e" % (self.math.sqrt(total_rmsd / total_N))}
1038 
1039 
1040 class GetModelDensity(object):
1041  """Compute mean density maps from structures.
1042 
1043  Keeps a dictionary of density maps,
1044  keys are in the custom ranges. When you call add_subunits_density, it adds
1045  particle coordinates to the existing density maps.
1046  """
1047 
1048  def __init__(self, custom_ranges, representation=None, resolution=20.0, voxel=5.0):
1049  """Constructor.
1050  @param custom_ranges Required. It's a dictionary, keys are the
1051  density component names, values are selection tuples
1052  e.g. {'kin28':[['kin28',1,-1]],
1053  'density_name_1' :[('ccl1')],
1054  'density_name_2' :[(1,142,'tfb3d1'),
1055  (143,700,'tfb3d2')],
1056  @param representation PMI representation, for doing selections.
1057  Not needed if you only pass hierarchies
1058  @param resolution The MRC resolution of the output map (in Angstrom unit)
1059  @param voxel The voxel size for the output map (lower is slower)
1060  """
1061 
1062  self.representation = representation
1063  self.MRCresolution = resolution
1064  self.voxel = voxel
1065  self.densities = {}
1066  self.count_models = 0.0
1067  self.custom_ranges = custom_ranges
1068 
1069  def add_subunits_density(self, hierarchy=None):
1070  """Add a frame to the densities.
1071  @param hierarchy Optionally read the hierarchy from somewhere.
1072  If not passed, will just read the representation.
1073  """
1074  self.count_models += 1.0
1075 
1076  if hierarchy:
1077  part_dict = get_particles_at_resolution_one(hierarchy)
1078  all_particles_by_resolution = []
1079  for name in part_dict:
1080  all_particles_by_resolution += part_dict[name]
1081 
1082  for density_name in self.custom_ranges:
1083  parts = []
1084  if hierarchy:
1085  all_particles_by_segments = []
1086 
1087  for seg in self.custom_ranges[density_name]:
1088  if not hierarchy:
1089  # when you have a IMP.pmi.representation.Representation class
1090  parts += IMP.tools.select_by_tuple(self.representation,
1091  seg, resolution=1, name_is_ambiguous=False)
1092  else:
1093  # else, when you have a hierarchy, but not a representation
1094  if not IMP.pmi.get_is_canonical(hierarchy):
1095  for h in hierarchy.get_children():
1097  IMP.atom.Molecule.setup_particle(h.get_particle())
1098 
1099  if type(seg) == str:
1100  s = IMP.atom.Selection(hierarchy,molecule=seg)
1101  elif type(seg) == tuple:
1102  s = IMP.atom.Selection(
1103  hierarchy, molecule=seg[2],residue_indexes=range(seg[0], seg[1] + 1))
1104  else:
1105  raise Exception('could not understand selection tuple '+str(seg))
1106 
1107  all_particles_by_segments += s.get_selected_particles()
1108  if hierarchy:
1109  if IMP.pmi.get_is_canonical(hierarchy):
1110  parts = all_particles_by_segments
1111  else:
1112  parts = list(
1113  set(all_particles_by_segments) & set(all_particles_by_resolution))
1114  self._create_density_from_particles(parts, density_name)
1115 
1116  def normalize_density(self):
1117  pass
1118 
1119  def _create_density_from_particles(self, ps, name,
1120  kernel_type='GAUSSIAN'):
1121  '''Internal function for adding to densities.
1122  pass XYZR particles with mass and create a density from them.
1123  kernel type options are GAUSSIAN, BINARIZED_SPHERE, and SPHERE.'''
1124  kd = {
1125  'GAUSSIAN': IMP.em.GAUSSIAN,
1126  'BINARIZED_SPHERE': IMP.em.BINARIZED_SPHERE,
1127  'SPHERE': IMP.em.SPHERE}
1128 
1129  dmap = IMP.em.SampledDensityMap(ps, self.MRCresolution, self.voxel)
1130  dmap.calcRMS()
1131  dmap.set_was_used(True)
1132  if name not in self.densities:
1133  self.densities[name] = dmap
1134  else:
1135  bbox1 = IMP.em.get_bounding_box(self.densities[name])
1136  bbox2 = IMP.em.get_bounding_box(dmap)
1137  bbox1 += bbox2
1138  dmap3 = IMP.em.create_density_map(bbox1,self.voxel)
1139  dmap3.set_was_used(True)
1140  dmap3.add(dmap)
1141  dmap3.add(self.densities[name])
1142  self.densities[name] = dmap3
1143 
1144  def get_density_keys(self):
1145  return list(self.densities.keys())
1146 
1147  def get_density(self,name):
1148  """Get the current density for some component name"""
1149  if name not in self.densities:
1150  return None
1151  else:
1152  return self.densities[name]
1153 
1154  def write_mrc(self, path="./"):
1155  for density_name in self.densities:
1156  self.densities[density_name].multiply(1. / self.count_models)
1157  IMP.em.write_map(
1158  self.densities[density_name],
1159  path + "/" + density_name + ".mrc",
1161 
1162 
1163 class GetContactMap(object):
1164 
1165  def __init__(self, distance=15.):
1166  self.distance = distance
1167  self.contactmap = ''
1168  self.namelist = []
1169  self.xlinks = 0
1170  self.XL = {}
1171  self.expanded = {}
1172  self.resmap = {}
1173 
1174  def set_prot(self, prot):
1175  from scipy.spatial.distance import cdist
1176  self.prot = prot
1177  self.protnames = []
1178  coords = []
1179  radii = []
1180  namelist = []
1181 
1182  particles_dictionary = get_particles_at_resolution_one(self.prot)
1183 
1184  for name in particles_dictionary:
1185  residue_indexes = []
1186  for p in particles_dictionary[name]:
1187  print(p.get_name())
1188  residue_indexes += IMP.pmi.tools.get_residue_indexes(p)
1189 
1190  if len(residue_indexes) != 0:
1191  self.protnames.append(name)
1192 
1193  def get_subunit_coords(self, frame, align=0):
1194  from scipy.spatial.distance import cdist
1195  coords = []
1196  radii = []
1197  namelist = []
1198  test, testr = [], []
1199  for part in self.prot.get_children():
1200  SortedSegments = []
1201  print(part)
1202  for chl in part.get_children():
1203  start = IMP.atom.get_leaves(chl)[0]
1204  end = IMP.atom.get_leaves(chl)[-1]
1205 
1206  startres = IMP.atom.Fragment(start).get_residue_indexes()[0]
1207  endres = IMP.atom.Fragment(end).get_residue_indexes()[-1]
1208  SortedSegments.append((chl, startres))
1209  SortedSegments = sorted(SortedSegments, key=itemgetter(1))
1210 
1211  for sgmnt in SortedSegments:
1212  for leaf in IMP.atom.get_leaves(sgmnt[0]):
1213  p = IMP.core.XYZR(leaf)
1214  crd = np.array([p.get_x(), p.get_y(), p.get_z()])
1215 
1216  coords.append(crd)
1217  radii.append(p.get_radius())
1218 
1219  new_name = part.get_name() + '_' + sgmnt[0].get_name() +\
1220  '_' + \
1221  str(IMP.atom.Fragment(leaf)
1222  .get_residue_indexes()[0])
1223  namelist.append(new_name)
1224  self.expanded[new_name] = len(
1225  IMP.atom.Fragment(leaf).get_residue_indexes())
1226  if part.get_name() not in self.resmap:
1227  self.resmap[part.get_name()] = {}
1228  for res in IMP.atom.Fragment(leaf).get_residue_indexes():
1229  self.resmap[part.get_name()][res] = new_name
1230 
1231  coords = np.array(coords)
1232  radii = np.array(radii)
1233  if len(self.namelist) == 0:
1234  self.namelist = namelist
1235  self.contactmap = np.zeros((len(coords), len(coords)))
1236  distances = cdist(coords, coords)
1237  distances = (distances - radii).T - radii
1238  distances = distances <= self.distance
1239  self.contactmap += distances
1240 
1241  def add_xlinks(
1242  self,
1243  filname,
1244  identification_string='ISDCrossLinkMS_Distance_'):
1245  # 'ISDCrossLinkMS_Distance_interrb_6629-State:0-20:RPS30_218:eIF3j-1-1-0.1_None'
1246  self.xlinks = 1
1247  data = open(filname)
1248  D = data.readlines()
1249  data.close()
1250 
1251  for d in D:
1252  if identification_string in d:
1253  d = d.replace(
1254  "_",
1255  " ").replace("-",
1256  " ").replace(":",
1257  " ").split()
1258 
1259  t1, t2 = (d[0], d[1]), (d[1], d[0])
1260  if t1 not in self.XL:
1261  self.XL[t1] = [(int(d[2]) + 1, int(d[3]) + 1)]
1262  self.XL[t2] = [(int(d[3]) + 1, int(d[2]) + 1)]
1263  else:
1264  self.XL[t1].append((int(d[2]) + 1, int(d[3]) + 1))
1265  self.XL[t2].append((int(d[3]) + 1, int(d[2]) + 1))
1266 
1267  def dist_matrix(self, skip_cmap=0, skip_xl=1, outname=None):
1268  K = self.namelist
1269  M = self.contactmap
1270  C, R = [], []
1271  L = sum(self.expanded.values())
1272  proteins = self.protnames
1273 
1274  # exp new
1275  if skip_cmap == 0:
1276  Matrices = {}
1277  proteins = [p.get_name() for p in self.prot.get_children()]
1278  missing = []
1279  for p1 in range(len(proteins)):
1280  for p2 in range(p1, len(proteins)):
1281  pl1, pl2 = max(
1282  self.resmap[proteins[p1]].keys()), max(self.resmap[proteins[p2]].keys())
1283  pn1, pn2 = proteins[p1], proteins[p2]
1284  mtr = np.zeros((pl1 + 1, pl2 + 1))
1285  print('Creating matrix for: ', p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1286  for i1 in range(1, pl1 + 1):
1287  for i2 in range(1, pl2 + 1):
1288  try:
1289  r1 = K.index(self.resmap[pn1][i1])
1290  r2 = K.index(self.resmap[pn2][i2])
1291  r = M[r1, r2]
1292  mtr[i1 - 1, i2 - 1] = r
1293  except KeyError:
1294  missing.append((pn1, pn2, i1, i2))
1295  pass
1296  Matrices[(pn1, pn2)] = mtr
1297 
1298  # add cross-links
1299  if skip_xl == 0:
1300  if self.XL == {}:
1301  raise ValueError("cross-links were not provided, use add_xlinks function!")
1302  Matrices_xl = {}
1303  missing_xl = []
1304  for p1 in range(len(proteins)):
1305  for p2 in range(p1, len(proteins)):
1306  pl1, pl2 = max(
1307  self.resmap[proteins[p1]].keys()), max(self.resmap[proteins[p2]].keys())
1308  pn1, pn2 = proteins[p1], proteins[p2]
1309  mtr = np.zeros((pl1 + 1, pl2 + 1))
1310  flg = 0
1311  try:
1312  xls = self.XL[(pn1, pn2)]
1313  except KeyError:
1314  try:
1315  xls = self.XL[(pn2, pn1)]
1316  flg = 1
1317  except KeyError:
1318  flg = 2
1319  if flg == 0:
1320  print('Creating matrix for: ', p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1321  for xl1, xl2 in xls:
1322  if xl1 > pl1:
1323  print('X' * 10, xl1, xl2)
1324  xl1 = pl1
1325  if xl2 > pl2:
1326  print('X' * 10, xl1, xl2)
1327  xl2 = pl2
1328  mtr[xl1 - 1, xl2 - 1] = 100
1329  elif flg == 1:
1330  print('Creating matrix for: ', p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1331  for xl1, xl2 in xls:
1332  if xl1 > pl1:
1333  print('X' * 10, xl1, xl2)
1334  xl1 = pl1
1335  if xl2 > pl2:
1336  print('X' * 10, xl1, xl2)
1337  xl2 = pl2
1338  mtr[xl2 - 1, xl1 - 1] = 100
1339  else:
1340  print('No cross links between: ', pn1, pn2)
1341  Matrices_xl[(pn1, pn2)] = mtr
1342 
1343  # expand the matrix to individual residues
1344  #NewM = []
1345  # for x1 in xrange(len(K)):
1346  # lst = []
1347  # for x2 in xrange(len(K)):
1348  # lst += [M[x1,x2]]*self.expanded[K[x2]]
1349  # for i in xrange(self.expanded[K[x1]]): NewM.append(np.array(lst))
1350  #NewM = np.array(NewM)
1351 
1352  # make list of protein names and create coordinate lists
1353  C = proteins
1354  # W is the component length list,
1355  # R is the contiguous coordinates list
1356  W, R = [], []
1357  for i, c in enumerate(C):
1358  cl = max(self.resmap[c].keys())
1359  W.append(cl)
1360  if i == 0:
1361  R.append(cl)
1362  else:
1363  R.append(R[-1] + cl)
1364 
1365  # start plotting
1366  if outname:
1367  # Don't require a display
1368  import matplotlib as mpl
1369  mpl.use('Agg')
1370  import matplotlib.pyplot as plt
1371  import matplotlib.gridspec as gridspec
1372  import scipy.sparse as sparse
1373 
1374  f = plt.figure()
1375  gs = gridspec.GridSpec(len(W), len(W),
1376  width_ratios=W,
1377  height_ratios=W)
1378 
1379  cnt = 0
1380  for x1, r1 in enumerate(R):
1381  if x1 == 0:
1382  s1 = 0
1383  else:
1384  s1 = R[x1 - 1]
1385  for x2, r2 in enumerate(R):
1386  if x2 == 0:
1387  s2 = 0
1388  else:
1389  s2 = R[x2 - 1]
1390 
1391  ax = plt.subplot(gs[cnt])
1392  if skip_cmap == 0:
1393  try:
1394  mtr = Matrices[(C[x1], C[x2])]
1395  except KeyError:
1396  mtr = Matrices[(C[x2], C[x1])].T
1397  #cax = ax.imshow(log(NewM[s1:r1,s2:r2] / 1.), interpolation='nearest', vmin=0., vmax=log(NewM.max()))
1398  cax = ax.imshow(
1399  np.log(mtr),
1400  interpolation='nearest',
1401  vmin=0.,
1402  vmax=log(mtr.max()))
1403  ax.set_xticks([])
1404  ax.set_yticks([])
1405  if skip_xl == 0:
1406  try:
1407  mtr = Matrices_xl[(C[x1], C[x2])]
1408  except KeyError:
1409  mtr = Matrices_xl[(C[x2], C[x1])].T
1410  cax = ax.spy(
1411  sparse.csr_matrix(mtr),
1412  markersize=10,
1413  color='white',
1414  linewidth=100,
1415  alpha=0.5)
1416  ax.set_xticks([])
1417  ax.set_yticks([])
1418 
1419  cnt += 1
1420  if x2 == 0:
1421  ax.set_ylabel(C[x1], rotation=90)
1422  if outname:
1423  plt.savefig(outname + ".pdf", dpi=300, transparent="False")
1424  else:
1425  plt.show()
1426 
1427 
1428 # ------------------------------------------------------------------
1429 # a few random tools
1430 
1431 def get_hiers_from_rmf(model, frame_number, rmf_file):
1432  # I have to deprecate this function
1433  print("getting coordinates for frame %i rmf file %s" % (frame_number, rmf_file))
1434 
1435  # load the frame
1436  rh = RMF.open_rmf_file_read_only(rmf_file)
1437 
1438  try:
1439  prots = IMP.rmf.create_hierarchies(rh, model)
1440  except IOError:
1441  print("Unable to open rmf file %s" % (rmf_file))
1442  return None
1443  #IMP.rmf.link_hierarchies(rh, prots)
1444 
1445  try:
1446  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1447  except IOError:
1448  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1449  return None
1450  model.update()
1451  del rh
1452 
1453  return prots
1454 
1455 def link_hiers_to_rmf(model,hiers,frame_number, rmf_file):
1456  print("linking hierarchies for frame %i rmf file %s" % (frame_number, rmf_file))
1457  rh = RMF.open_rmf_file_read_only(rmf_file)
1458  IMP.rmf.link_hierarchies(rh, hiers)
1459  try:
1460  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1461  except:
1462  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1463  return False
1464  model.update()
1465  del rh
1466  return True
1467 
1468 
1469 def get_hiers_and_restraints_from_rmf(model, frame_number, rmf_file):
1470  # I have to deprecate this function
1471  print("getting coordinates for frame %i rmf file %s" % (frame_number, rmf_file))
1472 
1473  # load the frame
1474  rh = RMF.open_rmf_file_read_only(rmf_file)
1475 
1476  try:
1477  prots = IMP.rmf.create_hierarchies(rh, model)
1478  rs = IMP.rmf.create_restraints(rh, model)
1479  except:
1480  print("Unable to open rmf file %s" % (rmf_file))
1481  return None,None
1482  try:
1483  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1484  except:
1485  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1486  return None,None
1487  model.update()
1488  del rh
1489 
1490  return prots,rs
1491 
1492 def link_hiers_and_restraints_to_rmf(model,hiers,rs, frame_number, rmf_file):
1493  print("linking hierarchies for frame %i rmf file %s" % (frame_number, rmf_file))
1494  rh = RMF.open_rmf_file_read_only(rmf_file)
1495  IMP.rmf.link_hierarchies(rh, hiers)
1496  IMP.rmf.link_restraints(rh, rs)
1497  try:
1498  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1499  except:
1500  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1501  return False
1502  model.update()
1503  del rh
1504  return True
1505 
1506 
1507 def get_hiers_from_rmf(model, frame_number, rmf_file):
1508  print("getting coordinates for frame %i rmf file %s" % (frame_number, rmf_file))
1509 
1510  # load the frame
1511  rh = RMF.open_rmf_file_read_only(rmf_file)
1512 
1513  try:
1514  prots = IMP.rmf.create_hierarchies(rh, model)
1515  except:
1516  print("Unable to open rmf file %s" % (rmf_file))
1517  prot = None
1518  return prot
1519  #IMP.rmf.link_hierarchies(rh, prots)
1520  try:
1521  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1522  except:
1523  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1524  prots = None
1525  model.update()
1526  del rh
1527  return prots
1528 
1529 
1531  """Get particles at res 1, or any beads, based on the name.
1532  No Representation is needed. This is mainly used when the hierarchy
1533  is read from an RMF file.
1534  @return a dictionary of component names and their particles
1535  \note If the root node is named "System" or is a "State", do proper selection.
1536  """
1537  particle_dict = {}
1538 
1539  # attempt to give good results for PMI2
1540  if IMP.pmi.get_is_canonical(prot):
1541  for mol in IMP.atom.get_by_type(prot,IMP.atom.MOLECULE_TYPE):
1542  sel = IMP.atom.Selection(mol,resolution=1)
1543  particle_dict[mol.get_name()] = sel.get_selected_particles()
1544  else:
1545  allparticles = []
1546  for c in prot.get_children():
1547  name = c.get_name()
1548  particle_dict[name] = IMP.atom.get_leaves(c)
1549  for s in c.get_children():
1550  if "_Res:1" in s.get_name() and "_Res:10" not in s.get_name():
1551  allparticles += IMP.atom.get_leaves(s)
1552  if "Beads" in s.get_name():
1553  allparticles += IMP.atom.get_leaves(s)
1554 
1555  particle_align = []
1556  for name in particle_dict:
1557  particle_dict[name] = IMP.pmi.tools.sort_by_residues(
1558  list(set(particle_dict[name]) & set(allparticles)))
1559  return particle_dict
1560 
1562  """Get particles at res 10, or any beads, based on the name.
1563  No Representation is needed.
1564  This is mainly used when the hierarchy is read from an RMF file.
1565  @return a dictionary of component names and their particles
1566  \note If the root node is named "System" or is a "State", do proper selection.
1567  """
1568  particle_dict = {}
1569  # attempt to give good results for PMI2
1570  if IMP.pmi.get_is_canonical(prot):
1571  for mol in IMP.atom.get_by_type(prot,IMP.atom.MOLECULE_TYPE):
1572  sel = IMP.atom.Selection(mol,resolution=10)
1573  particle_dict[mol.get_name()] = sel.get_selected_particles()
1574  else:
1575  allparticles = []
1576  for c in prot.get_children():
1577  name = c.get_name()
1578  particle_dict[name] = IMP.atom.get_leaves(c)
1579  for s in c.get_children():
1580  if "_Res:10" in s.get_name():
1581  allparticles += IMP.atom.get_leaves(s)
1582  if "Beads" in s.get_name():
1583  allparticles += IMP.atom.get_leaves(s)
1584  particle_align = []
1585  for name in particle_dict:
1586  particle_dict[name] = IMP.pmi.tools.sort_by_residues(
1587  list(set(particle_dict[name]) & set(allparticles)))
1588  return particle_dict
1589 
1590 def select_by_tuple(first_res_last_res_name_tuple):
1591  first_res = first_res_last_res_hier_tuple[0]
1592  last_res = first_res_last_res_hier_tuple[1]
1593  name = first_res_last_res_hier_tuple[2]
1594 
1595 class CrossLinkTable(object):
1596  """Visualization of crosslinks"""
1597  def __init__(self):
1598  self.crosslinks = []
1599  self.external_csv_data = None
1600  self.crosslinkedprots = set()
1601  self.mindist = +10000000.0
1602  self.maxdist = -10000000.0
1603  self.contactmap = None
1604 
1605  def set_hierarchy(self, prot):
1606  self.prot_length_dict = {}
1607  self.model=prot.get_model()
1608 
1609  for i in prot.get_children():
1610  name = i.get_name()
1611  residue_indexes = []
1612  for p in IMP.atom.get_leaves(i):
1613  residue_indexes += IMP.pmi.tools.get_residue_indexes(p)
1614 
1615  if len(residue_indexes) != 0:
1616  self.prot_length_dict[name] = max(residue_indexes)
1617 
1618  def set_coordinates_for_contact_map(self, rmf_name,rmf_frame_index):
1619  from scipy.spatial.distance import cdist
1620 
1621  rh= RMF.open_rmf_file_read_only(rmf_name)
1622  prots=IMP.rmf.create_hierarchies(rh, self.model)
1623  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
1624  print("getting coordinates for frame %i rmf file %s" % (rmf_frame_index, rmf_name))
1625  del rh
1626 
1627 
1628  coords = []
1629  radii = []
1630  namelist = []
1631 
1632  particles_dictionary = get_particles_at_resolution_one(prots[0])
1633 
1634  resindex = 0
1635  self.index_dictionary = {}
1636 
1637  for name in particles_dictionary:
1638  residue_indexes = []
1639  for p in particles_dictionary[name]:
1640  print(p.get_name())
1641  residue_indexes = IMP.pmi.tools.get_residue_indexes(p)
1642  #residue_indexes.add( )
1643 
1644  if len(residue_indexes) != 0:
1645 
1646  for res in range(min(residue_indexes), max(residue_indexes) + 1):
1647  d = IMP.core.XYZR(p)
1648 
1649  crd = np.array([d.get_x(), d.get_y(), d.get_z()])
1650  coords.append(crd)
1651  radii.append(d.get_radius())
1652  if name not in self.index_dictionary:
1653  self.index_dictionary[name] = [resindex]
1654  else:
1655  self.index_dictionary[name].append(resindex)
1656  resindex += 1
1657 
1658  coords = np.array(coords)
1659  radii = np.array(radii)
1660 
1661  distances = cdist(coords, coords)
1662  distances = (distances - radii).T - radii
1663 
1664  distances = np.where(distances <= 20.0, 1.0, 0)
1665  if self.contactmap is None:
1666  self.contactmap = np.zeros((len(coords), len(coords)))
1667  self.contactmap += distances
1668 
1669  for prot in prots: IMP.atom.destroy(prot)
1670 
1671  def set_crosslinks(
1672  self, data_file, search_label='ISDCrossLinkMS_Distance_',
1673  mapping=None,
1674  filter_label=None,
1675  filter_rmf_file_names=None, #provide a list of rmf base names to filter the stat file
1676  external_csv_data_file=None,
1677  external_csv_data_file_unique_id_key="Unique ID"):
1678 
1679  # example key ISDCrossLinkMS_Distance_intrarb_937-State:0-108:RPS3_55:RPS30-1-1-0.1_None
1680  # mapping is a dictionary that maps standard keywords to entry positions in the key string
1681  # confidence class is a filter that
1682  # external datafile is a datafile that contains further information on the crosslinks
1683  # it will use the unique id to create the dictionary keys
1684 
1685  po = IMP.pmi.output.ProcessOutput(data_file)
1686  keys = po.get_keys()
1687 
1688  xl_keys = [k for k in keys if search_label in k]
1689 
1690  if filter_rmf_file_names is not None:
1691  rmf_file_key="local_rmf_file_name"
1692  fs = po.get_fields(xl_keys+[rmf_file_key])
1693  else:
1694  fs = po.get_fields(xl_keys)
1695 
1696  # this dictionary stores the occurency of given crosslinks
1697  self.cross_link_frequency = {}
1698 
1699  # this dictionary stores the series of distances for given crosslinked
1700  # residues
1701  self.cross_link_distances = {}
1702 
1703  # this dictionary stores the series of distances for given crosslinked
1704  # residues
1705  self.cross_link_distances_unique = {}
1706 
1707  if not external_csv_data_file is None:
1708  # this dictionary stores the further information on crosslinks
1709  # labeled by unique ID
1710  self.external_csv_data = {}
1711  xldb = IMP.pmi.tools.get_db_from_csv(external_csv_data_file)
1712 
1713  for xl in xldb:
1714  self.external_csv_data[
1715  xl[external_csv_data_file_unique_id_key]] = xl
1716 
1717  # this list keeps track the tuple of cross-links and sample
1718  # so that we don't count twice the same crosslinked residues in the
1719  # same sample
1720  cross_link_frequency_list = []
1721 
1722  self.unique_cross_link_list = []
1723 
1724  for key in xl_keys:
1725  print(key)
1726  keysplit = key.replace(
1727  "_",
1728  " ").replace(
1729  "-",
1730  " ").replace(
1731  ":",
1732  " ").split(
1733  )
1734 
1735  if filter_label!=None:
1736  if filter_label not in keysplit: continue
1737 
1738  if mapping is None:
1739  r1 = int(keysplit[5])
1740  c1 = keysplit[6]
1741  r2 = int(keysplit[7])
1742  c2 = keysplit[8]
1743  try:
1744  confidence = keysplit[12]
1745  except:
1746  confidence = '0.0'
1747  try:
1748  unique_identifier = keysplit[3]
1749  except:
1750  unique_identifier = '0'
1751  else:
1752  r1 = int(keysplit[mapping["Residue1"]])
1753  c1 = keysplit[mapping["Protein1"]]
1754  r2 = int(keysplit[mapping["Residue2"]])
1755  c2 = keysplit[mapping["Protein2"]]
1756  try:
1757  confidence = keysplit[mapping["Confidence"]]
1758  except:
1759  confidence = '0.0'
1760  try:
1761  unique_identifier = keysplit[mapping["Unique Identifier"]]
1762  except:
1763  unique_identifier = '0'
1764 
1765  self.crosslinkedprots.add(c1)
1766  self.crosslinkedprots.add(c2)
1767 
1768  # construct the list of distances corresponding to the input rmf
1769  # files
1770 
1771  dists=[]
1772  if filter_rmf_file_names is not None:
1773  for n,d in enumerate(fs[key]):
1774  if fs[rmf_file_key][n] in filter_rmf_file_names:
1775  dists.append(float(d))
1776  else:
1777  dists=[float(f) for f in fs[key]]
1778 
1779  # check if the input confidence class corresponds to the
1780  # one of the cross-link
1781 
1782  mdist = self.median(dists)
1783 
1784  stdv = np.std(np.array(dists))
1785  if self.mindist > mdist:
1786  self.mindist = mdist
1787  if self.maxdist < mdist:
1788  self.maxdist = mdist
1789 
1790  # calculate the frequency of unique crosslinks within the same
1791  # sample
1792  if not self.external_csv_data is None:
1793  sample = self.external_csv_data[unique_identifier]["Sample"]
1794  else:
1795  sample = "None"
1796 
1797  if (r1, c1, r2, c2,mdist) not in cross_link_frequency_list:
1798  if (r1, c1, r2, c2) not in self.cross_link_frequency:
1799  self.cross_link_frequency[(r1, c1, r2, c2)] = 1
1800  self.cross_link_frequency[(r2, c2, r1, c1)] = 1
1801  else:
1802  self.cross_link_frequency[(r2, c2, r1, c1)] += 1
1803  self.cross_link_frequency[(r1, c1, r2, c2)] += 1
1804  cross_link_frequency_list.append((r1, c1, r2, c2))
1805  cross_link_frequency_list.append((r2, c2, r1, c1))
1806  self.unique_cross_link_list.append(
1807  (r1, c1, r2, c2,mdist))
1808 
1809  if (r1, c1, r2, c2) not in self.cross_link_distances:
1810  self.cross_link_distances[(
1811  r1,
1812  c1,
1813  r2,
1814  c2,
1815  mdist,
1816  confidence)] = dists
1817  self.cross_link_distances[(
1818  r2,
1819  c2,
1820  r1,
1821  c1,
1822  mdist,
1823  confidence)] = dists
1824  self.cross_link_distances_unique[(r1, c1, r2, c2)] = dists
1825  else:
1826  self.cross_link_distances[(
1827  r2,
1828  c2,
1829  r1,
1830  c1,
1831  mdist,
1832  confidence)] += dists
1833  self.cross_link_distances[(
1834  r1,
1835  c1,
1836  r2,
1837  c2,
1838  mdist,
1839  confidence)] += dists
1840 
1841  self.crosslinks.append(
1842  (r1,
1843  c1,
1844  r2,
1845  c2,
1846  mdist,
1847  stdv,
1848  confidence,
1849  unique_identifier,
1850  'original'))
1851  self.crosslinks.append(
1852  (r2,
1853  c2,
1854  r1,
1855  c1,
1856  mdist,
1857  stdv,
1858  confidence,
1859  unique_identifier,
1860  'reversed'))
1861 
1862  self.cross_link_frequency_inverted = {}
1863  for xl in self.unique_cross_link_list:
1864  (r1, c1, r2, c2, mdist) = xl
1865  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
1866  if frequency not in self.cross_link_frequency_inverted:
1867  self.cross_link_frequency_inverted[
1868  frequency] = [(r1, c1, r2, c2)]
1869  else:
1870  self.cross_link_frequency_inverted[
1871  frequency].append((r1, c1, r2, c2))
1872 
1873  # -------------
1874 
1875  def median(self, mylist):
1876  sorts = sorted(mylist)
1877  length = len(sorts)
1878  print(length)
1879  if length == 1:
1880  return mylist[0]
1881  if not length % 2:
1882  return (sorts[length / 2] + sorts[length / 2 - 1]) / 2.0
1883  return sorts[length / 2]
1884 
1885  def set_threshold(self,threshold):
1886  self.threshold=threshold
1887 
1888  def set_tolerance(self,tolerance):
1889  self.tolerance=tolerance
1890 
1891  def colormap(self, dist):
1892  if dist < self.threshold - self.tolerance:
1893  return "Green"
1894  elif dist >= self.threshold + self.tolerance:
1895  return "Orange"
1896  else:
1897  return "Red"
1898 
1899  def write_cross_link_database(self, filename, format='csv'):
1900  import csv
1901 
1902  fieldnames = [
1903  "Unique ID", "Protein1", "Residue1", "Protein2", "Residue2",
1904  "Median Distance", "Standard Deviation", "Confidence", "Frequency", "Arrangement"]
1905 
1906  if not self.external_csv_data is None:
1907  keys = list(self.external_csv_data.keys())
1908  innerkeys = list(self.external_csv_data[keys[0]].keys())
1909  innerkeys.sort()
1910  fieldnames += innerkeys
1911 
1912  dw = csv.DictWriter(
1913  open(filename,
1914  "w"),
1915  delimiter=',',
1916  fieldnames=fieldnames)
1917  dw.writeheader()
1918  for xl in self.crosslinks:
1919  (r1, c1, r2, c2, mdist, stdv, confidence,
1920  unique_identifier, descriptor) = xl
1921  if descriptor == 'original':
1922  outdict = {}
1923  outdict["Unique ID"] = unique_identifier
1924  outdict["Protein1"] = c1
1925  outdict["Protein2"] = c2
1926  outdict["Residue1"] = r1
1927  outdict["Residue2"] = r2
1928  outdict["Median Distance"] = mdist
1929  outdict["Standard Deviation"] = stdv
1930  outdict["Confidence"] = confidence
1931  outdict["Frequency"] = self.cross_link_frequency[
1932  (r1, c1, r2, c2)]
1933  if c1 == c2:
1934  arrangement = "Intra"
1935  else:
1936  arrangement = "Inter"
1937  outdict["Arrangement"] = arrangement
1938  if not self.external_csv_data is None:
1939  outdict.update(self.external_csv_data[unique_identifier])
1940 
1941  dw.writerow(outdict)
1942 
1943  def plot(self, prot_listx=None, prot_listy=None, no_dist_info=False,
1944  no_confidence_info=False, filter=None, layout="whole", crosslinkedonly=False,
1945  filename=None, confidence_classes=None, alphablend=0.1, scale_symbol_size=1.0,
1946  gap_between_components=0,
1947  rename_protein_map=None):
1948  # layout can be:
1949  # "lowerdiagonal" print only the lower diagonal plot
1950  # "upperdiagonal" print only the upper diagonal plot
1951  # "whole" print all
1952  # crosslinkedonly: plot only components that have crosslinks
1953  # no_dist_info: if True will plot only the cross-links as grey spots
1954  # filter = tuple the tuple contains a keyword to be search in the database
1955  # a relationship ">","==","<"
1956  # and a value
1957  # example ("ID_Score",">",40)
1958  # scale_symbol_size rescale the symbol for the crosslink
1959  # rename_protein_map is a dictionary to rename proteins
1960 
1961  import matplotlib as mpl
1962  mpl.use('Agg')
1963  import matplotlib.pyplot as plt
1964  import matplotlib.cm as cm
1965 
1966  fig = plt.figure(figsize=(10, 10))
1967  ax = fig.add_subplot(111)
1968 
1969  ax.set_xticks([])
1970  ax.set_yticks([])
1971 
1972  # set the list of proteins on the x axis
1973  if prot_listx is None:
1974  if crosslinkedonly:
1975  prot_listx = list(self.crosslinkedprots)
1976  else:
1977  prot_listx = list(self.prot_length_dict.keys())
1978  prot_listx.sort()
1979 
1980  nresx = gap_between_components + \
1981  sum([self.prot_length_dict[name]
1982  + gap_between_components for name in prot_listx])
1983 
1984  # set the list of proteins on the y axis
1985 
1986  if prot_listy is None:
1987  if crosslinkedonly:
1988  prot_listy = list(self.crosslinkedprots)
1989  else:
1990  prot_listy = list(self.prot_length_dict.keys())
1991  prot_listy.sort()
1992 
1993  nresy = gap_between_components + \
1994  sum([self.prot_length_dict[name]
1995  + gap_between_components for name in prot_listy])
1996 
1997  # this is the residue offset for each protein
1998  resoffsetx = {}
1999  resendx = {}
2000  res = gap_between_components
2001  for prot in prot_listx:
2002  resoffsetx[prot] = res
2003  res += self.prot_length_dict[prot]
2004  resendx[prot] = res
2005  res += gap_between_components
2006 
2007  resoffsety = {}
2008  resendy = {}
2009  res = gap_between_components
2010  for prot in prot_listy:
2011  resoffsety[prot] = res
2012  res += self.prot_length_dict[prot]
2013  resendy[prot] = res
2014  res += gap_between_components
2015 
2016  resoffsetdiagonal = {}
2017  res = gap_between_components
2018  for prot in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2019  resoffsetdiagonal[prot] = res
2020  res += self.prot_length_dict[prot]
2021  res += gap_between_components
2022 
2023  # plot protein boundaries
2024 
2025  xticks = []
2026  xlabels = []
2027  for n, prot in enumerate(prot_listx):
2028  res = resoffsetx[prot]
2029  end = resendx[prot]
2030  for proty in prot_listy:
2031  resy = resoffsety[proty]
2032  endy = resendy[proty]
2033  ax.plot([res, res], [resy, endy], 'k-', lw=0.4)
2034  ax.plot([end, end], [resy, endy], 'k-', lw=0.4)
2035  xticks.append((float(res) + float(end)) / 2)
2036  if rename_protein_map is not None:
2037  if prot in rename_protein_map:
2038  prot=rename_protein_map[prot]
2039  xlabels.append(prot)
2040 
2041  yticks = []
2042  ylabels = []
2043  for n, prot in enumerate(prot_listy):
2044  res = resoffsety[prot]
2045  end = resendy[prot]
2046  for protx in prot_listx:
2047  resx = resoffsetx[protx]
2048  endx = resendx[protx]
2049  ax.plot([resx, endx], [res, res], 'k-', lw=0.4)
2050  ax.plot([resx, endx], [end, end], 'k-', lw=0.4)
2051  yticks.append((float(res) + float(end)) / 2)
2052  if rename_protein_map is not None:
2053  if prot in rename_protein_map:
2054  prot=rename_protein_map[prot]
2055  ylabels.append(prot)
2056 
2057  # plot the contact map
2058  print(prot_listx, prot_listy)
2059 
2060  if not self.contactmap is None:
2061  import matplotlib.cm as cm
2062  tmp_array = np.zeros((nresx, nresy))
2063 
2064  for px in prot_listx:
2065  print(px)
2066  for py in prot_listy:
2067  print(py)
2068  resx = resoffsety[px]
2069  lengx = resendx[px] - 1
2070  resy = resoffsety[py]
2071  lengy = resendy[py] - 1
2072  indexes_x = self.index_dictionary[px]
2073  minx = min(indexes_x)
2074  maxx = max(indexes_x)
2075  indexes_y = self.index_dictionary[py]
2076  miny = min(indexes_y)
2077  maxy = max(indexes_y)
2078 
2079  print(px, py, minx, maxx, miny, maxy)
2080 
2081  try:
2082  tmp_array[
2083  resx:lengx,
2084  resy:lengy] = self.contactmap[
2085  minx:maxx,
2086  miny:maxy]
2087  except:
2088  continue
2089 
2090 
2091  ax.imshow(tmp_array,
2092  cmap=cm.binary,
2093  origin='lower',
2094  interpolation='nearest')
2095 
2096  ax.set_xticks(xticks)
2097  ax.set_xticklabels(xlabels, rotation=90)
2098  ax.set_yticks(yticks)
2099  ax.set_yticklabels(ylabels)
2100  ax.set_xlim(0,nresx)
2101  ax.set_ylim(0,nresy)
2102 
2103 
2104  # set the crosslinks
2105 
2106  already_added_xls = []
2107 
2108  for xl in self.crosslinks:
2109 
2110  (r1, c1, r2, c2, mdist, stdv, confidence,
2111  unique_identifier, descriptor) = xl
2112 
2113  if confidence_classes is not None:
2114  if confidence not in confidence_classes:
2115  continue
2116 
2117  try:
2118  pos1 = r1 + resoffsetx[c1]
2119  except:
2120  continue
2121  try:
2122  pos2 = r2 + resoffsety[c2]
2123  except:
2124  continue
2125 
2126  if not filter is None:
2127  xldb = self.external_csv_data[unique_identifier]
2128  xldb.update({"Distance": mdist})
2129  xldb.update({"Distance_stdv": stdv})
2130 
2131  if filter[1] == ">":
2132  if float(xldb[filter[0]]) <= float(filter[2]):
2133  continue
2134 
2135  if filter[1] == "<":
2136  if float(xldb[filter[0]]) >= float(filter[2]):
2137  continue
2138 
2139  if filter[1] == "==":
2140  if float(xldb[filter[0]]) != float(filter[2]):
2141  continue
2142 
2143  # all that below is used for plotting the diagonal
2144  # when you have a rectangolar plots
2145 
2146  pos_for_diagonal1 = r1 + resoffsetdiagonal[c1]
2147  pos_for_diagonal2 = r2 + resoffsetdiagonal[c2]
2148 
2149  if layout == 'lowerdiagonal':
2150  if pos_for_diagonal1 <= pos_for_diagonal2:
2151  continue
2152  if layout == 'upperdiagonal':
2153  if pos_for_diagonal1 >= pos_for_diagonal2:
2154  continue
2155 
2156  already_added_xls.append((r1, c1, r2, c2))
2157 
2158  if not no_confidence_info:
2159  if confidence == '0.01':
2160  markersize = 14 * scale_symbol_size
2161  elif confidence == '0.05':
2162  markersize = 9 * scale_symbol_size
2163  elif confidence == '0.1':
2164  markersize = 6 * scale_symbol_size
2165  else:
2166  markersize = 15 * scale_symbol_size
2167  else:
2168  markersize = 5 * scale_symbol_size
2169 
2170  if not no_dist_info:
2171  color = self.colormap(mdist)
2172  else:
2173  color = "gray"
2174 
2175  ax.plot(
2176  [pos1],
2177  [pos2],
2178  'o',
2179  c=color,
2180  alpha=alphablend,
2181  markersize=markersize)
2182 
2183 
2184 
2185  fig.set_size_inches(0.004 * nresx, 0.004 * nresy)
2186 
2187  [i.set_linewidth(2.0) for i in ax.spines.values()]
2188 
2189  #plt.tight_layout()
2190 
2191  if filename:
2192  plt.savefig(filename + ".pdf", dpi=300, transparent="False")
2193  else:
2194  plt.show()
2195 
2196  def get_frequency_statistics(self, prot_list,
2197  prot_list2=None):
2198 
2199  violated_histogram = {}
2200  satisfied_histogram = {}
2201  unique_cross_links = []
2202 
2203  for xl in self.unique_cross_link_list:
2204  (r1, c1, r2, c2, mdist) = xl
2205 
2206  # here we filter by the protein
2207  if prot_list2 is None:
2208  if not c1 in prot_list:
2209  continue
2210  if not c2 in prot_list:
2211  continue
2212  else:
2213  if c1 in prot_list and c2 in prot_list2:
2214  pass
2215  elif c1 in prot_list2 and c2 in prot_list:
2216  pass
2217  else:
2218  continue
2219 
2220  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
2221 
2222  if (r1, c1, r2, c2) not in unique_cross_links:
2223  if mdist > 35.0:
2224  if frequency not in violated_histogram:
2225  violated_histogram[frequency] = 1
2226  else:
2227  violated_histogram[frequency] += 1
2228  else:
2229  if frequency not in satisfied_histogram:
2230  satisfied_histogram[frequency] = 1
2231  else:
2232  satisfied_histogram[frequency] += 1
2233  unique_cross_links.append((r1, c1, r2, c2))
2234  unique_cross_links.append((r2, c2, r1, c1))
2235 
2236  print("# satisfied")
2237 
2238  total_number_of_crosslinks=0
2239 
2240  for i in satisfied_histogram:
2241  # if i in violated_histogram:
2242  # print i, satisfied_histogram[i]+violated_histogram[i]
2243  # else:
2244  if i in violated_histogram:
2245  print(i, violated_histogram[i]+satisfied_histogram[i])
2246  else:
2247  print(i, satisfied_histogram[i])
2248  total_number_of_crosslinks+=i*satisfied_histogram[i]
2249 
2250  print("# violated")
2251 
2252  for i in violated_histogram:
2253  print(i, violated_histogram[i])
2254  total_number_of_crosslinks+=i*violated_histogram[i]
2255 
2256  print(total_number_of_crosslinks)
2257 
2258 
2259 # ------------
2260  def print_cross_link_binary_symbols(self, prot_list,
2261  prot_list2=None):
2262  tmp_matrix = []
2263  confidence_list = []
2264  for xl in self.crosslinks:
2265  (r1, c1, r2, c2, mdist, stdv, confidence,
2266  unique_identifier, descriptor) = xl
2267 
2268  if prot_list2 is None:
2269  if not c1 in prot_list:
2270  continue
2271  if not c2 in prot_list:
2272  continue
2273  else:
2274  if c1 in prot_list and c2 in prot_list2:
2275  pass
2276  elif c1 in prot_list2 and c2 in prot_list:
2277  pass
2278  else:
2279  continue
2280 
2281  if descriptor != "original":
2282  continue
2283 
2284  confidence_list.append(confidence)
2285 
2286  dists = self.cross_link_distances_unique[(r1, c1, r2, c2)]
2287  tmp_dist_binary = []
2288  for d in dists:
2289  if d < 35:
2290  tmp_dist_binary.append(1)
2291  else:
2292  tmp_dist_binary.append(0)
2293  tmp_matrix.append(tmp_dist_binary)
2294 
2295  matrix = list(zip(*tmp_matrix))
2296 
2297  satisfied_high_sum = 0
2298  satisfied_mid_sum = 0
2299  satisfied_low_sum = 0
2300  total_satisfied_sum = 0
2301  for k, m in enumerate(matrix):
2302  satisfied_high = 0
2303  total_high = 0
2304  satisfied_mid = 0
2305  total_mid = 0
2306  satisfied_low = 0
2307  total_low = 0
2308  total_satisfied = 0
2309  total = 0
2310  for n, b in enumerate(m):
2311  if confidence_list[n] == "0.01":
2312  total_high += 1
2313  if b == 1:
2314  satisfied_high += 1
2315  satisfied_high_sum += 1
2316  elif confidence_list[n] == "0.05":
2317  total_mid += 1
2318  if b == 1:
2319  satisfied_mid += 1
2320  satisfied_mid_sum += 1
2321  elif confidence_list[n] == "0.1":
2322  total_low += 1
2323  if b == 1:
2324  satisfied_low += 1
2325  satisfied_low_sum += 1
2326  if b == 1:
2327  total_satisfied += 1
2328  total_satisfied_sum += 1
2329  total += 1
2330  print(k, satisfied_high, total_high)
2331  print(k, satisfied_mid, total_mid)
2332  print(k, satisfied_low, total_low)
2333  print(k, total_satisfied, total)
2334  print(float(satisfied_high_sum) / len(matrix))
2335  print(float(satisfied_mid_sum) / len(matrix))
2336  print(float(satisfied_low_sum) / len(matrix))
2337 # ------------
2338 
2339  def get_unique_crosslinks_statistics(self, prot_list,
2340  prot_list2=None):
2341 
2342  print(prot_list)
2343  print(prot_list2)
2344  satisfied_high = 0
2345  total_high = 0
2346  satisfied_mid = 0
2347  total_mid = 0
2348  satisfied_low = 0
2349  total_low = 0
2350  total = 0
2351  tmp_matrix = []
2352  satisfied_string = []
2353  for xl in self.crosslinks:
2354  (r1, c1, r2, c2, mdist, stdv, confidence,
2355  unique_identifier, descriptor) = xl
2356 
2357  if prot_list2 is None:
2358  if not c1 in prot_list:
2359  continue
2360  if not c2 in prot_list:
2361  continue
2362  else:
2363  if c1 in prot_list and c2 in prot_list2:
2364  pass
2365  elif c1 in prot_list2 and c2 in prot_list:
2366  pass
2367  else:
2368  continue
2369 
2370  if descriptor != "original":
2371  continue
2372 
2373  total += 1
2374  if confidence == "0.01":
2375  total_high += 1
2376  if mdist <= 35:
2377  satisfied_high += 1
2378  if confidence == "0.05":
2379  total_mid += 1
2380  if mdist <= 35:
2381  satisfied_mid += 1
2382  if confidence == "0.1":
2383  total_low += 1
2384  if mdist <= 35:
2385  satisfied_low += 1
2386  if mdist <= 35:
2387  satisfied_string.append(1)
2388  else:
2389  satisfied_string.append(0)
2390 
2391  dists = self.cross_link_distances_unique[(r1, c1, r2, c2)]
2392  tmp_dist_binary = []
2393  for d in dists:
2394  if d < 35:
2395  tmp_dist_binary.append(1)
2396  else:
2397  tmp_dist_binary.append(0)
2398  tmp_matrix.append(tmp_dist_binary)
2399 
2400  print("unique satisfied_high/total_high", satisfied_high, "/", total_high)
2401  print("unique satisfied_mid/total_mid", satisfied_mid, "/", total_mid)
2402  print("unique satisfied_low/total_low", satisfied_low, "/", total_low)
2403  print("total", total)
2404 
2405  matrix = list(zip(*tmp_matrix))
2406  satisfied_models = 0
2407  satstr = ""
2408  for b in satisfied_string:
2409  if b == 0:
2410  satstr += "-"
2411  if b == 1:
2412  satstr += "*"
2413 
2414  for m in matrix:
2415  all_satisfied = True
2416  string = ""
2417  for n, b in enumerate(m):
2418  if b == 0:
2419  string += "0"
2420  if b == 1:
2421  string += "1"
2422  if b == 1 and satisfied_string[n] == 1:
2423  pass
2424  elif b == 1 and satisfied_string[n] == 0:
2425  pass
2426  elif b == 0 and satisfied_string[n] == 0:
2427  pass
2428  elif b == 0 and satisfied_string[n] == 1:
2429  all_satisfied = False
2430  if all_satisfied:
2431  satisfied_models += 1
2432  print(string)
2433  print(satstr, all_satisfied)
2434  print("models that satisfies the median satisfied crosslinks/total models", satisfied_models, len(matrix))
2435 
2436  def plot_matrix_cross_link_distances_unique(self, figurename, prot_list,
2437  prot_list2=None):
2438 
2439  import matplotlib as mpl
2440  mpl.use('Agg')
2441  import matplotlib.pylab as pl
2442 
2443  tmp_matrix = []
2444  for kw in self.cross_link_distances_unique:
2445  (r1, c1, r2, c2) = kw
2446  dists = self.cross_link_distances_unique[kw]
2447 
2448  if prot_list2 is None:
2449  if not c1 in prot_list:
2450  continue
2451  if not c2 in prot_list:
2452  continue
2453  else:
2454  if c1 in prot_list and c2 in prot_list2:
2455  pass
2456  elif c1 in prot_list2 and c2 in prot_list:
2457  pass
2458  else:
2459  continue
2460  # append the sum of dists to order by that in the matrix plot
2461  dists.append(sum(dists))
2462  tmp_matrix.append(dists)
2463 
2464  tmp_matrix.sort(key=itemgetter(len(tmp_matrix[0]) - 1))
2465 
2466  # print len(tmp_matrix), len(tmp_matrix[0])-1
2467  matrix = np.zeros((len(tmp_matrix), len(tmp_matrix[0]) - 1))
2468 
2469  for i in range(len(tmp_matrix)):
2470  for k in range(len(tmp_matrix[i]) - 1):
2471  matrix[i][k] = tmp_matrix[i][k]
2472 
2473  print(matrix)
2474 
2475  fig = pl.figure()
2476  ax = fig.add_subplot(211)
2477 
2478  cax = ax.imshow(matrix, interpolation='nearest')
2479  # ax.set_yticks(range(len(self.model_list_names)))
2480  #ax.set_yticklabels( [self.model_list_names[i] for i in leaves_order] )
2481  fig.colorbar(cax)
2482  pl.savefig(figurename, dpi=300)
2483  pl.show()
2484 
2485  def plot_bars(
2486  self,
2487  filename,
2488  prots1,
2489  prots2,
2490  nxl_per_row=20,
2491  arrangement="inter",
2492  confidence_input="None"):
2493 
2494  data = []
2495  for xl in self.cross_link_distances:
2496  (r1, c1, r2, c2, mdist, confidence) = xl
2497  if c1 in prots1 and c2 in prots2:
2498  if arrangement == "inter" and c1 == c2:
2499  continue
2500  if arrangement == "intra" and c1 != c2:
2501  continue
2502  if confidence_input == confidence:
2503  label = str(c1) + ":" + str(r1) + \
2504  "-" + str(c2) + ":" + str(r2)
2505  values = self.cross_link_distances[xl]
2506  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
2507  data.append((label, values, mdist, frequency))
2508 
2509  sort_by_dist = sorted(data, key=lambda tup: tup[2])
2510  sort_by_dist = list(zip(*sort_by_dist))
2511  values = sort_by_dist[1]
2512  positions = list(range(len(values)))
2513  labels = sort_by_dist[0]
2514  frequencies = list(map(float, sort_by_dist[3]))
2515  frequencies = [f * 10.0 for f in frequencies]
2516 
2517  nchunks = int(float(len(values)) / nxl_per_row)
2518  values_chunks = IMP.pmi.tools.chunk_list_into_segments(values, nchunks)
2519  positions_chunks = IMP.pmi.tools.chunk_list_into_segments(
2520  positions,
2521  nchunks)
2522  frequencies_chunks = IMP.pmi.tools.chunk_list_into_segments(
2523  frequencies,
2524  nchunks)
2525  labels_chunks = IMP.pmi.tools.chunk_list_into_segments(labels, nchunks)
2526 
2527  for n, v in enumerate(values_chunks):
2528  p = positions_chunks[n]
2529  f = frequencies_chunks[n]
2530  l = labels_chunks[n]
2532  filename + "." + str(n), v, p, f,
2533  valuename="Distance (Ang)", positionname="Unique " + arrangement + " Crosslinks", xlabels=l)
2534 
2535  def crosslink_distance_histogram(self, filename,
2536  prot_list=None,
2537  prot_list2=None,
2538  confidence_classes=None,
2539  bins=40,
2540  color='#66CCCC',
2541  yplotrange=[0, 1],
2542  format="png",
2543  normalized=False):
2544  if prot_list is None:
2545  prot_list = list(self.prot_length_dict.keys())
2546 
2547  distances = []
2548  for xl in self.crosslinks:
2549  (r1, c1, r2, c2, mdist, stdv, confidence,
2550  unique_identifier, descriptor) = xl
2551 
2552  if not confidence_classes is None:
2553  if confidence not in confidence_classes:
2554  continue
2555 
2556  if prot_list2 is None:
2557  if not c1 in prot_list:
2558  continue
2559  if not c2 in prot_list:
2560  continue
2561  else:
2562  if c1 in prot_list and c2 in prot_list2:
2563  pass
2564  elif c1 in prot_list2 and c2 in prot_list:
2565  pass
2566  else:
2567  continue
2568 
2569  distances.append(mdist)
2570 
2572  filename, distances, valuename="C-alpha C-alpha distance [Ang]",
2573  bins=bins, color=color,
2574  format=format,
2575  reference_xline=35.0,
2576  yplotrange=yplotrange, normalized=normalized)
2577 
2578  def scatter_plot_xl_features(self, filename,
2579  feature1=None,
2580  feature2=None,
2581  prot_list=None,
2582  prot_list2=None,
2583  yplotrange=None,
2584  reference_ylines=None,
2585  distance_color=True,
2586  format="png"):
2587  import matplotlib as mpl
2588  mpl.use('Agg')
2589  import matplotlib.pyplot as plt
2590  import matplotlib.cm as cm
2591 
2592  fig = plt.figure(figsize=(10, 10))
2593  ax = fig.add_subplot(111)
2594 
2595  for xl in self.crosslinks:
2596  (r1, c1, r2, c2, mdist, stdv, confidence,
2597  unique_identifier, arrangement) = xl
2598 
2599  if prot_list2 is None:
2600  if not c1 in prot_list:
2601  continue
2602  if not c2 in prot_list:
2603  continue
2604  else:
2605  if c1 in prot_list and c2 in prot_list2:
2606  pass
2607  elif c1 in prot_list2 and c2 in prot_list:
2608  pass
2609  else:
2610  continue
2611 
2612  xldb = self.external_csv_data[unique_identifier]
2613  xldb.update({"Distance": mdist})
2614  xldb.update({"Distance_stdv": stdv})
2615 
2616  xvalue = float(xldb[feature1])
2617  yvalue = float(xldb[feature2])
2618 
2619  if distance_color:
2620  color = self.colormap(mdist)
2621  else:
2622  color = "gray"
2623 
2624  ax.plot([xvalue], [yvalue], 'o', c=color, alpha=0.1, markersize=7)
2625 
2626  if not yplotrange is None:
2627  ax.set_ylim(yplotrange)
2628  if not reference_ylines is None:
2629  for rl in reference_ylines:
2630  ax.axhline(rl, color='red', linestyle='dashed', linewidth=1)
2631 
2632  if filename:
2633  plt.savefig(filename + "." + format, dpi=150, transparent="False")
2634 
2635  plt.show()
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Molecule.h:35
Simple 3D transformation class.
Visualization of crosslinks.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
double get_drms(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
A class for reading stat files.
Definition: output.py:696
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
def plot_field_histogram
Plot a list of histograms from a value list.
Definition: output.py:959
def plot_fields_box_plots
Plot time series as boxplots.
Definition: output.py:1024
def __init__
Constructor.
double get_drmsd(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1)
Calculate distance the root mean square deviation between two sets of 3D points.
Definition: atom/distance.h:49
Miscellaneous utilities.
Definition: tools.py:1
def __init__
Constructor.
double get_weighted_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2, const Floats &weights)
void link_restraints(RMF::FileConstHandle fh, const Restraints &hs)
A class to evaluate the precision of an ensemble.
log
Definition: log.py:1
A class to cluster structures.
def get_rmsd_wrt_reference_structure_with_alignment
First align then calculate RMSD.
static Molecule setup_particle(Model *m, ParticleIndex pi)
Definition: Molecule.h:37
def __init__
Constructor.
Definition: pmi/Analysis.py:32
Class for sampling a density map from particles.
double get_rmsd(const Selection &s0, const Selection &s1)
def add_structure
Read a structure into the ensemble and store (as coordinates).
DensityMap * multiply(const DensityMap *m1, const DensityMap *m2)
Return a density map for which voxel i contains the result of m1[i]*m2[i].
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:23
def add_subunits_density
Add a frame to the densities.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
Definition: DensityMap.h:633
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:1110
double get_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
Basic utilities for handling cryo-electron microscopy 3D density maps.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name.
def fill
Add coordinates for a single model.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: utilities.h:91
def do_cluster
Run K-means clustering.
3D rotation class.
Definition: Rotation3D.h:46
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:464
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def set_reference_structure
Read in a structure used for reference computation.
def get_rmsf
Calculate the residue mean square fluctuations (RMSF).
def get_average_distance_wrt_reference_structure
Compare the structure set to the reference structure.
def get_density
Get the current density for some component name.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
The general base class for IMP exceptions.
Definition: exception.h:49
def get_precision
Evaluate the precision of two named structure groups.
VectorD< 3 > Vector3D
Definition: VectorD.h:395
Restraints create_restraints(RMF::FileConstHandle fh, Model *m)
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
This object can be added to output_objects in (for example in the ReplicaExchange0 macro)...
def get_particles_at_resolution_ten
Get particles at res 10, or any beads, based on the name.
Python classes to represent, score, sample and analyze models.
def add_structures
Read a list of RMFs, supports parallel.
Transformation3D get_transformation_aligning_first_to_second(Vector3Ds a, Vector3Ds b)
Hierarchies get_leaves(const Selection &h)
double get_drmsd_Q(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1, double threshold)
Definition: atom/distance.h:85
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Compute mean density maps from structures.
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:1068
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27