IMP logo
IMP Reference Guide  2.6.2
The Integrative Modeling Platform
pmi/Analysis.py
1 #!/usr/bin/env python
2 
3 """@namespace IMP.pmi.analysis
4  Tools for clustering and cluster analysis
5 """
6 from __future__ import print_function
7 import IMP
8 import IMP.algebra
9 import IMP.em
10 import IMP.pmi
11 import IMP.pmi.tools
12 import IMP.pmi.output
13 import IMP.rmf
14 import RMF
15 import IMP.pmi.analysis
16 from operator import itemgetter
17 from copy import deepcopy
18 from math import log,sqrt
19 import itertools
20 import numpy as np
21 
22 
23 class Alignment(object):
24  """Performs alignment and RMSD calculation for two sets of coordinates
25 
26  The class also takes into account non-equal stoichiometry of the proteins.
27  If this is the case, the protein names of proteins in multiple copies
28  should be specified in the following form:
29  nameA..1, nameA..2 (note two dots).
30  """
31 
32  def __init__(self, template, query, weights=None):
33  """Constructor.
34  @param query {'p1':coords(L,3), 'p2':coords(L,3)}
35  @param template {'p1':coords(L,3), 'p2':coords(L,3)}
36  @param weights optional weights for each set of coordinates
37  """
38  self.query = query
39  self.template = template
40  self.weights=weights
41 
42  if len(self.query.keys()) != len(self.template.keys()):
43  raise ValueError('''the number of proteins
44  in template and query does not match!''')
45 
46  def permute(self):
47 
48  self.proteins = sorted(self.query.keys())
49  prots_uniq = [i.split('..')[0] for i in self.proteins]
50  P = {}
51  for p in prots_uniq:
52  np = prots_uniq.count(p)
53  copies = [i for i in self.proteins if i.split('..')[0] == p]
54  prmts = list(itertools.permutations(copies, len(copies)))
55  P[p] = prmts
56  self.P = P
57  self.Product = list(itertools.product(*P.values()))
58 
59  def get_rmsd(self):
60 
61  self.permute()
62 
63  template_xyz = []
64  weights = []
65  torder = sum([list(i) for i in self.Product[0]], [])
66  for t in torder:
67  template_xyz += [IMP.algebra.Vector3D(i) for i in self.template[t]]
68  if self.weights is not None:
69  weights += [i for i in self.weights[t]]
70  #template_xyz = np.array(template_xyz)
71 
72  self.rmsd = 10000000000.
73  for comb in self.Product:
74 
75 
76 
77  order = sum([list(i) for i in comb], [])
78  query_xyz = []
79  for p in order:
80  query_xyz += [IMP.algebra.Vector3D(i) for i in self.query[p]]
81  #query_xyz = np.array(query_xyz)
82  #if len(template_xyz) != len(query_xyz):
83  # print '''Alignment.get_rmsd: ERROR: the number of coordinates
84  # in template and query does not match!'''
85  # exit()
86 
87  if self.weights is not None:
88  dist=IMP.algebra.get_weighted_rmsd(template_xyz, query_xyz, weights)
89  else:
90  dist=IMP.algebra.get_rmsd(template_xyz, query_xyz)
91  #dist = sqrt(
92  # sum(np.diagonal(cdist(template_xyz, query_xyz) ** 2)) / len(template_xyz))
93  if dist < self.rmsd:
94  self.rmsd = dist
95  return self.rmsd
96 
97  def align(self):
98  from scipy.spatial.distance import cdist
99 
100  self.permute()
101 
102  template_xyz = []
103  torder = sum([list(i) for i in self.Product[0]], [])
104  for t in torder:
105  template_xyz += [IMP.algebra.Vector3D(i) for i in self.template[t]]
106  #template_xyz = np.array(template_xyz)
107 
108  self.rmsd, Transformation = 10000000000., ''
109  for comb in self.Product:
110  order = sum([list(i) for i in comb], [])
111  query_xyz = []
112  for p in order:
113  query_xyz += [IMP.algebra.Vector3D(i) for i in self.query[p]]
114  #query_xyz = np.array(query_xyz)
115 
116  if len(template_xyz) != len(query_xyz):
117  raise ValueError('''the number of coordinates
118  in template and query does not match!''')
119 
121  query_xyz,
122  template_xyz)
123  query_xyz_tr = [transformation.get_transformed(n)
124  for n in query_xyz]
125 
126  dist = sqrt(
127  sum(np.diagonal(cdist(template_xyz, query_xyz_tr) ** 2)) / len(template_xyz))
128  if dist < self.rmsd:
129  self.rmsd = dist
130  Transformation = transformation
131 
132  return (self.rmsd, Transformation)
133 
134 
135 # TEST for the alignment ###
136 """
137 Proteins = {'a..1':np.array([np.array([-1.,1.])]),
138  'a..2':np.array([np.array([1.,1.,])]),
139  'a..3':np.array([np.array([-2.,1.])]),
140  'b':np.array([np.array([0.,-1.])]),
141  'c..1':np.array([np.array([-1.,-1.])]),
142  'c..2':np.array([np.array([1.,-1.])]),
143  'd':np.array([np.array([0.,0.])]),
144  'e':np.array([np.array([0.,1.])])}
145 
146 Ali = Alignment(Proteins, Proteins)
147 Ali.permute()
148 if Ali.get_rmsd() == 0.0: print 'successful test!'
149 else: print 'ERROR!'; exit()
150 """
151 
152 
153 # ----------------------------------
154 class Violations(object):
155 
156  def __init__(self, filename):
157 
158  self.violation_thresholds = {}
159  self.violation_counts = {}
160 
161  data = open(filename)
162  D = data.readlines()
163  data.close()
164 
165  for d in D:
166  d = d.strip().split()
167  self.violation_thresholds[d[0]] = float(d[1])
168 
169  def get_number_violated_restraints(self, rsts_dict):
170  num_violated = 0
171  for rst in self.violation_thresholds:
172  if rst not in rsts_dict:
173  continue # print rst;
174  if float(rsts_dict[rst]) > self.violation_thresholds[rst]:
175  num_violated += 1
176  if rst not in self.violation_counts:
177  self.violation_counts[rst] = 1
178  else:
179  self.violation_counts[rst] += 1
180  return num_violated
181 
182 
183 # ----------------------------------
184 class Clustering(object):
185  """A class to cluster structures.
186  Uses scipy's cdist function to compute distance matrices
187  and sklearn's kmeans clustering module.
188  """
189  def __init__(self,rmsd_weights=None):
190  """Constructor.
191  @param rmsd_weights Flat list of weights for each particle
192  (if they're coarse)
193  """
194  try:
195  from mpi4py import MPI
196  self.comm = MPI.COMM_WORLD
197  self.rank = self.comm.Get_rank()
198  self.number_of_processes = self.comm.size
199  except ImportError:
200  self.number_of_processes = 1
201  self.rank = 0
202  self.all_coords = {}
203  self.structure_cluster_ids = None
204  self.tmpl_coords = None
205  self.rmsd_weights=rmsd_weights
206 
207  def set_template(self, part_coords):
208 
209  self.tmpl_coords = part_coords
210 
211  def fill(self, frame, Coords):
212  """Add coordinates for a single model."""
213 
214  self.all_coords[frame] = Coords
215 
216  def dist_matrix(self):
217 
218  self.model_list_names = list(self.all_coords.keys())
219  self.model_indexes = list(range(len(self.model_list_names)))
220  self.model_indexes_dict = dict(
221  list(zip(self.model_list_names, self.model_indexes)))
222  model_indexes_unique_pairs = list(itertools.combinations(self.model_indexes, 2))
223 
224  my_model_indexes_unique_pairs = IMP.pmi.tools.chunk_list_into_segments(
225  model_indexes_unique_pairs,
226  self.number_of_processes)[self.rank]
227 
228  print("process %s assigned with %s pairs" % (str(self.rank), str(len(my_model_indexes_unique_pairs))))
229 
230  (raw_distance_dict, self.transformation_distance_dict) = self.matrix_calculation(self.all_coords,
231  self.tmpl_coords,
232  my_model_indexes_unique_pairs)
233 
234  if self.number_of_processes > 1:
235  raw_distance_dict = IMP.pmi.tools.scatter_and_gather(
236  raw_distance_dict)
237  pickable_transformations = self.get_pickable_transformation_distance_dict(
238  )
239  pickable_transformations = IMP.pmi.tools.scatter_and_gather(
240  pickable_transformations)
241  self.set_transformation_distance_dict_from_pickable(
242  pickable_transformations)
243 
244  self.raw_distance_matrix = np.zeros(
245  (len(self.model_list_names), len(self.model_list_names)))
246  for item in raw_distance_dict:
247  (f1, f2) = item
248  self.raw_distance_matrix[f1, f2] = raw_distance_dict[item]
249  self.raw_distance_matrix[f2, f1] = raw_distance_dict[item]
250 
251  def get_dist_matrix(self):
252  return self.raw_distance_matrix
253 
254  def do_cluster(self, number_of_clusters,seed=None):
255  """Run K-means clustering
256  @param number_of_clusters Num means
257  @param seed the random seed
258  """
259  from sklearn.cluster import KMeans
260  if seed is not None:
261  np.random.seed(seed)
262  try:
263  # check whether we have the right version of sklearn
264  kmeans = KMeans(n_clusters=number_of_clusters)
265  except TypeError:
266  # sklearn older than 0.12
267  kmeans = KMeans(k=number_of_clusters)
268  kmeans.fit_predict(self.raw_distance_matrix)
269 
270  self.structure_cluster_ids = kmeans.labels_
271 
272  def get_pickable_transformation_distance_dict(self):
273  pickable_transformations = {}
274  for label in self.transformation_distance_dict:
275  tr = self.transformation_distance_dict[label]
276  trans = tuple(tr.get_translation())
277  rot = tuple(tr.get_rotation().get_quaternion())
278  pickable_transformations[label] = (rot, trans)
279  return pickable_transformations
280 
281  def set_transformation_distance_dict_from_pickable(
282  self,
283  pickable_transformations):
284  self.transformation_distance_dict = {}
285  for label in pickable_transformations:
286  tr = pickable_transformations[label]
287  trans = IMP.algebra.Vector3D(tr[1])
288  rot = IMP.algebra.Rotation3D(tr[0])
289  self.transformation_distance_dict[
290  label] = IMP.algebra.Transformation3D(rot, trans)
291 
292  def save_distance_matrix_file(self, file_name='cluster.rawmatrix.pkl'):
293  import pickle
294  outf = open(file_name + ".data", 'wb')
295 
296  # to pickle the transformation dictionary
297  # you have to save the arrays correposnding to
298  # the transformations
299 
300  pickable_transformations = self.get_pickable_transformation_distance_dict(
301  )
302  pickle.dump(
303  (self.structure_cluster_ids,
304  self.model_list_names,
305  pickable_transformations),
306  outf)
307 
308  np.save(file_name + ".npy", self.raw_distance_matrix)
309 
310  def load_distance_matrix_file(self, file_name='cluster.rawmatrix.pkl'):
311  import pickle
312 
313  inputf = open(file_name + ".data", 'rb')
314  (self.structure_cluster_ids, self.model_list_names,
315  pickable_transformations) = pickle.load(inputf)
316  inputf.close()
317 
318  self.raw_distance_matrix = np.load(file_name + ".npy")
319 
320  self.set_transformation_distance_dict_from_pickable(
321  pickable_transformations)
322  self.model_indexes = list(range(len(self.model_list_names)))
323  self.model_indexes_dict = dict(
324  list(zip(self.model_list_names, self.model_indexes)))
325 
326  def plot_matrix(self, figurename="clustermatrix.pdf"):
327  import matplotlib as mpl
328  mpl.use('Agg')
329  import matplotlib.pylab as pl
330  from scipy.cluster import hierarchy as hrc
331 
332  fig = pl.figure(figsize=(10,8))
333  ax = fig.add_subplot(212)
334  dendrogram = hrc.dendrogram(
335  hrc.linkage(self.raw_distance_matrix),
336  color_threshold=7,
337  no_labels=True)
338  leaves_order = dendrogram['leaves']
339  ax.set_xlabel('Model')
340  ax.set_ylabel('RMSD [Angstroms]')
341 
342  ax2 = fig.add_subplot(221)
343  cax = ax2.imshow(
344  self.raw_distance_matrix[leaves_order,
345  :][:,
346  leaves_order],
347  interpolation='nearest')
348  cb = fig.colorbar(cax)
349  cb.set_label('RMSD [Angstroms]')
350  ax2.set_xlabel('Model')
351  ax2.set_ylabel('Model')
352 
353  pl.savefig(figurename, dpi=300)
354  pl.close(fig)
355 
356  def get_model_index_from_name(self, name):
357  return self.model_indexes_dict[name]
358 
359  def get_cluster_labels(self):
360  # this list
361  return list(set(self.structure_cluster_ids))
362 
363  def get_number_of_clusters(self):
364  return len(self.get_cluster_labels())
365 
366  def get_cluster_label_indexes(self, label):
367  return (
368  [i for i, l in enumerate(self.structure_cluster_ids) if l == label]
369  )
370 
371  def get_cluster_label_names(self, label):
372  return (
373  [self.model_list_names[i]
374  for i in self.get_cluster_label_indexes(label)]
375  )
376 
377  def get_cluster_label_average_rmsd(self, label):
378 
379  indexes = self.get_cluster_label_indexes(label)
380 
381  if len(indexes) > 1:
382  sub_distance_matrix = self.raw_distance_matrix[
383  indexes, :][:, indexes]
384  average_rmsd = np.sum(sub_distance_matrix) / \
385  (len(sub_distance_matrix)
386  ** 2 - len(sub_distance_matrix))
387  else:
388  average_rmsd = 0.0
389  return average_rmsd
390 
391  def get_cluster_label_size(self, label):
392  return len(self.get_cluster_label_indexes(label))
393 
394  def get_transformation_to_first_member(
395  self,
396  cluster_label,
397  structure_index):
398  reference = self.get_cluster_label_indexes(cluster_label)[0]
399  return self.transformation_distance_dict[(reference, structure_index)]
400 
401  def matrix_calculation(self, all_coords, template_coords, list_of_pairs):
402 
403  model_list_names = list(all_coords.keys())
404  rmsd_protein_names = list(all_coords[model_list_names[0]].keys())
405  raw_distance_dict = {}
406  transformation_distance_dict = {}
407  if template_coords is None:
408  do_alignment = False
409  else:
410  do_alignment = True
411  alignment_template_protein_names = list(template_coords.keys())
412 
413  for (f1, f2) in list_of_pairs:
414 
415  if not do_alignment:
416  # here we only get the rmsd,
417  # we need that for instance when you want to cluster conformations
418  # globally, eg the EM map is a reference
420 
421  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
422  for pr in rmsd_protein_names])
423  coords_f2 = {}
424  for pr in rmsd_protein_names:
425  coords_f2[pr] = all_coords[model_list_names[f2]][pr]
426 
427  Ali = Alignment(coords_f1, coords_f2, self.rmsd_weights)
428  rmsd = Ali.get_rmsd()
429 
430  elif do_alignment:
431  # here we actually align the conformations first
432  # and than calculate the rmsd. We need that when the
433  # protein(s) is the reference
434  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
435  for pr in alignment_template_protein_names])
436  coords_f2 = dict([(pr, all_coords[model_list_names[f2]][pr])
437  for pr in alignment_template_protein_names])
438 
439  Ali = Alignment(coords_f1, coords_f2)
440  template_rmsd, transformation = Ali.align()
441 
442  # here we calculate the rmsd
443  # we will align two models based n the nuber of subunits provided
444  # and transform coordinates of model 2 to model 1
445  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
446  for pr in rmsd_protein_names])
447  coords_f2 = {}
448  for pr in rmsd_protein_names:
449  coords_f2[pr] = [transformation.get_transformed(
450  i) for i in all_coords[model_list_names[f2]][pr]]
451 
452  Ali = Alignment(coords_f1, coords_f2, self.rmsd_weights)
453  rmsd = Ali.get_rmsd()
454 
455  raw_distance_dict[(f1, f2)] = rmsd
456  raw_distance_dict[(f2, f1)] = rmsd
457  transformation_distance_dict[(f1, f2)] = transformation
458  transformation_distance_dict[(f2, f1)] = transformation
459 
460  return raw_distance_dict, transformation_distance_dict
461 
462 
463 class Precision(object):
464  """A class to evaluate the precision of an ensemble.
465 
466  Also can evaluate the cross-precision of multiple ensembles.
467  Supports MPI for coordinate reading.
468  Recommended procedure:
469  -# initialize object and pass the selection for evaluating precision
470  -# call add_structures() to read in the data (specify group name)
471  -# call get_precision() to evaluate inter/intra precision
472  -# call get_rmsf() to evaluate within-group fluctuations
473  """
474  def __init__(self,model,
475  resolution=1,
476  selection_dictionary={}):
477  """Constructor.
478  @param model The IMP Model
479  @param resolution Use 1 or 10 (kluge: requires that "_Res:X" is
480  part of the hier name)
481  @param selection_dictionary Dictionary where keys are names for
482  selections and values are selection tuples for scoring
483  precision. "All" is automatically made as well
484  """
485  try:
486  from mpi4py import MPI
487  self.comm = MPI.COMM_WORLD
488  self.rank = self.comm.Get_rank()
489  self.number_of_processes = self.comm.size
490  except ImportError:
491  self.number_of_processes = 1
492  self.rank = 0
493 
494  self.styles = ['pairwise_rmsd','pairwise_drmsd_k','pairwise_drmsd_Q',
495  'pairwise_drms_k','pairwise_rmsd','drmsd_from_center']
496  self.style = 'pairwise_drmsd_k'
497  self.structures_dictionary = {}
498  self.reference_structures_dictionary = {}
499  self.prots = []
500  self.protein_names = None
501  self.len_particles_resolution_one = None
502  self.model = model
503  self.rmf_names_frames = {}
504  self.reference_rmf_names_frames = None
505  self.reference_structure = None
506  self.reference_prot = None
507  self.selection_dictionary = selection_dictionary
508  self.threshold = 40.0
509  self.residue_particle_index_map = None
510  self.prots = None
511  if resolution in [1,10]:
512  self.resolution = resolution
513  else:
514  raise KeyError("Currently only allow resolution 1 or 10")
515 
516  def _get_structure(self,rmf_frame_index,rmf_name):
517  """Read an RMF file and return the particles"""
518  rh = RMF.open_rmf_file_read_only(rmf_name)
519  if not self.prots:
520  self.prots = IMP.rmf.create_hierarchies(rh, self.model)
521  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
522  print("getting coordinates for frame %i rmf file %s" % (rmf_frame_index, rmf_name))
523  else:
524  IMP.rmf.link_hierarchies(rh, self.prots)
525  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
526  print("linking coordinates for frame %i rmf file %s" % (rmf_frame_index, rmf_name))
527  del rh
528 
529  if self.resolution==1:
530  particle_dict = get_particles_at_resolution_one(self.prots[0])
531  elif self.resolution==10:
532  particle_dict = get_particles_at_resolution_ten(self.prots[0])
533 
534  protein_names = list(particle_dict.keys())
535  particles_resolution_one = []
536  for k in particle_dict:
537  particles_resolution_one += (particle_dict[k])
538 
539  if self.protein_names==None:
540  self.protein_names = protein_names
541  else:
542  if self.protein_names!=protein_names:
543  print("Error: the protein names of the new coordinate set is not compatible with the previous one")
544 
545  if self.len_particles_resolution_one==None:
546  self.len_particles_resolution_one = len(particles_resolution_one)
547  else:
548  if self.len_particles_resolution_one!=len(particles_resolution_one):
549  raise ValueError("the new coordinate set is not compatible with the previous one")
550 
551  return particles_resolution_one
552 
553  def add_structure(self,
554  rmf_name,
555  rmf_frame_index,
556  structure_set_name,
557  setup_index_map=False):
558  """ Read a structure into the ensemble and store (as coordinates).
559  @param rmf_name The name of the RMF file
560  @param rmf_frame_index The frame to read
561  @param structure_set_name Name for the set that includes this structure
562  (e.g. "cluster 1")
563  @param setup_index_map if requested, set up a dictionary to help
564  find residue indexes
565  """
566 
567  # decide where to put this structure
568  if structure_set_name in self.structures_dictionary:
569  cdict = self.structures_dictionary[structure_set_name]
570  rmflist = self.rmf_names_frames[structure_set_name]
571  else:
572  self.structures_dictionary[structure_set_name]={}
573  self.rmf_names_frames[structure_set_name]=[]
574  cdict = self.structures_dictionary[structure_set_name]
575  rmflist = self.rmf_names_frames[structure_set_name]
576 
577  # read the particles
578  try:
579  particles_resolution_one = self._get_structure(rmf_frame_index,rmf_name)
580  except ValueError:
581  print("something wrong with the rmf")
582  return 0
583  self.selection_dictionary.update({"All":self.protein_names})
584 
585  for selection_name in self.selection_dictionary:
586  selection_tuple = self.selection_dictionary[selection_name]
587  coords = self._select_coordinates(selection_tuple,particles_resolution_one,self.prots[0])
588  if selection_name not in cdict:
589  cdict[selection_name] = [coords]
590  else:
591  cdict[selection_name].append(coords)
592 
593  rmflist.append((rmf_name,rmf_frame_index))
594 
595  # if requested, set up a dictionary to help find residue indexes
596  if setup_index_map:
597  self.residue_particle_index_map = {}
598  for prot_name in self.protein_names:
599  self.residue_particle_index_map[prot_name] = \
600  self._get_residue_particle_index_map(
601  prot_name,
602  particles_resolution_one,self.prots[0])
603 
604  def add_structures(self,
605  rmf_name_frame_tuples,
606  structure_set_name):
607  """Read a list of RMFs, supports parallel
608  @param rmf_name_frame_tuples list of (rmf_file_name,frame_number)
609  @param structure_set_name Name this set of structures (e.g. "cluster.1")
610  """
611 
612  # split up the requested list to read in parallel
613  my_rmf_name_frame_tuples=IMP.pmi.tools.chunk_list_into_segments(
614  rmf_name_frame_tuples,self.number_of_processes)[self.rank]
615  for nfr,tup in enumerate(my_rmf_name_frame_tuples):
616  rmf_name=tup[0]
617  rmf_frame_index=tup[1]
618  # the first frame stores the map between residues and particles
619  if self.residue_particle_index_map is None:
620  setup_index_map = True
621  else:
622  setup_index_map = False
623  self.add_structure(rmf_name,
624  rmf_frame_index,
625  structure_set_name,
626  setup_index_map)
627 
628  # synchronize the structures
629  if self.number_of_processes > 1:
630  self.rmf_names_frames=IMP.pmi.tools.scatter_and_gather(self.rmf_names_frames)
631  if self.rank != 0:
632  self.comm.send(self.structures_dictionary, dest=0, tag=11)
633  elif self.rank == 0:
634  for i in range(1, self.number_of_processes):
635  data_tmp = self.comm.recv(source=i, tag=11)
636  for key in self.structures_dictionary:
637  self.structures_dictionary[key].update(data_tmp[key])
638  for i in range(1, self.number_of_processes):
639  self.comm.send(self.structures_dictionary, dest=i, tag=11)
640  if self.rank != 0:
641  self.structures_dictionary = self.comm.recv(source=0, tag=11)
642 
643  def _get_residue_particle_index_map(self,prot_name,structure,hier):
644  # Creates map from all particles to residue numbers
645  residue_particle_index_map = []
646  if IMP.pmi.get_is_canonical(hier):
647  s = IMP.atom.Selection(hier,molecules=[prot_name],
648  resolution=1)
649  else:
650  s = IMP.atom.Selection(hier,molecules=[prot_name])
651  all_selected_particles = s.get_selected_particles()
652  intersection = list(set(all_selected_particles) & set(structure))
653  sorted_intersection = IMP.pmi.tools.sort_by_residues(intersection)
654  for p in sorted_intersection:
655  residue_particle_index_map.append(IMP.pmi.tools.get_residue_indexes(p))
656  return residue_particle_index_map
657 
658 
659  def _select_coordinates(self,tuple_selections,structure,prot):
660  selected_coordinates=[]
661  for t in tuple_selections:
662  if type(t)==tuple and len(t)==3:
663  if IMP.pmi.get_is_canonical(prot):
664  s = IMP.atom.Selection(prot,molecules=[t[2]],residue_indexes=range(t[0],t[1]+1),
665  resolution=1)
666  else:
667  s = IMP.atom.Selection(prot,molecules=[t[2]],residue_indexes=range(t[0],t[1]+1))
668  all_selected_particles = s.get_selected_particles()
669  intersection = list(set(all_selected_particles) & set(structure))
670  sorted_intersection = IMP.pmi.tools.sort_by_residues(intersection)
671  cc = [tuple(IMP.core.XYZ(p).get_coordinates()) for p in sorted_intersection]
672  selected_coordinates += cc
673  elif type(t)==str:
674  if IMP.pmi.get_is_canonical(prot):
675  s = IMP.atom.Selection(prot,molecules=[t],resolution=1)
676  else:
677  s = IMP.atom.Selection(prot,molecules=[t])
678  all_selected_particles = s.get_selected_particles()
679  intersection = list(set(all_selected_particles) & set(structure))
680  sorted_intersection = IMP.pmi.tools.sort_by_residues(intersection)
681  cc = [tuple(IMP.core.XYZ(p).get_coordinates()) for p in sorted_intersection]
682  selected_coordinates += cc
683  else:
684  raise ValueError("Selection error")
685  return selected_coordinates
686 
687  def set_threshold(self,threshold):
688  self.threshold = threshold
689 
690  def _get_distance(self,
691  structure_set_name1,
692  structure_set_name2,
693  selection_name,
694  index1,
695  index2):
696  """ Compute distance between structures with various metrics """
697  c1 = self.structures_dictionary[structure_set_name1][selection_name][index1]
698  c2 = self.structures_dictionary[structure_set_name2][selection_name][index2]
699  coordinates1 = [IMP.algebra.Vector3D(c) for c in c1]
700  coordinates2 = [IMP.algebra.Vector3D(c) for c in c2]
701 
702  if self.style=='pairwise_drmsd_k':
703  distance=IMP.atom.get_drmsd(coordinates1,coordinates2)
704  if self.style=='pairwise_drms_k':
705  distance=IMP.atom.get_drms(coordinates1,coordinates2)
706  if self.style=='pairwise_drmsd_Q':
707  distance=IMP.atom.get_drmsd_Q(coordinates1,coordinates2,self.threshold)
708 
709  if self.style=='pairwise_rmsd':
710  distance=IMP.algebra.get_rmsd(coordinates1,coordinates2)
711  return distance
712 
713  def _get_particle_distances(self,structure_set_name1,structure_set_name2,
714  selection_name,index1,index2):
715  c1 = self.structures_dictionary[structure_set_name1][selection_name][index1]
716  c2 = self.structures_dictionary[structure_set_name2][selection_name][index2]
717 
718  coordinates1 = [IMP.algebra.Vector3D(c) for c in c1]
719  coordinates2 = [IMP.algebra.Vector3D(c) for c in c2]
720 
721  distances=[np.linalg.norm(a-b) for (a,b) in zip(coordinates1,coordinates2)]
722 
723  return distances
724 
725  def get_precision(self,
726  structure_set_name1,
727  structure_set_name2,
728  outfile=None,
729  skip=1,
730  selection_keywords=None):
731  """ Evaluate the precision of two named structure groups. Supports MPI.
732  When the structure_set_name1 is different from the structure_set_name2,
733  this evaluates the cross-precision (average pairwise distances).
734  @param outfile Name of the precision output file
735  @param structure_set_name1 string name of the first structure set
736  @param structure_set_name2 string name of the second structure set
737  @param skip analyze every (skip) structure for the distance matrix calculation
738  @param selection_keywords Specify the selection name you want to calculate on.
739  By default this is computed for everything you provided in the constructor,
740  plus all the subunits together.
741  """
742  if selection_keywords is None:
743  sel_keys = list(self.selection_dictionary.keys())
744  else:
745  for k in selection_keywords:
746  if k not in self.selection_dictionary:
747  raise KeyError("you are trying to find named selection " \
748  + k + " which was not requested in the constructor")
749  sel_keys = selection_keywords
750 
751  if outfile is not None:
752  of = open(outfile,"w")
753  centroid_index = 0
754  for selection_name in sel_keys:
755  number_of_structures_1 = len(self.structures_dictionary[structure_set_name1][selection_name])
756  number_of_structures_2 = len(self.structures_dictionary[structure_set_name2][selection_name])
757  distances={}
758  structure_pointers_1 = list(range(0,number_of_structures_1,skip))
759  structure_pointers_2 = list(range(0,number_of_structures_2,skip))
760  pair_combination_list = list(itertools.product(structure_pointers_1,structure_pointers_2))
761  if len(pair_combination_list)==0:
762  raise ValueError("no structure selected. Check the skip parameter.")
763 
764  # compute pairwise distances in parallel
765  my_pair_combination_list = IMP.pmi.tools.chunk_list_into_segments(
766  pair_combination_list,self.number_of_processes)[self.rank]
767  my_length = len(my_pair_combination_list)
768  for n,pair in enumerate(my_pair_combination_list):
769  progression = int(float(n)/my_length*100.0)
770  distances[pair] = self._get_distance(structure_set_name1,structure_set_name2,
771  selection_name,pair[0],pair[1])
772  if self.number_of_processes > 1:
773  distances = IMP.pmi.tools.scatter_and_gather(distances)
774 
775  # Finally compute distance to centroid
776  if self.rank == 0:
777  if structure_set_name1==structure_set_name2:
778  structure_pointers = structure_pointers_1
779  number_of_structures = number_of_structures_1
780 
781  # calculate the distance from the first centroid
782  # and determine the centroid
783  distance = 0.0
784  distances_to_structure = {}
785  distances_to_structure_normalization = {}
786 
787  for n in structure_pointers:
788  distances_to_structure[n] = 0.0
789  distances_to_structure_normalization[n]=0
790 
791  for k in distances:
792  distance += distances[k]
793  distances_to_structure[k[0]] += distances[k]
794  distances_to_structure[k[1]] += distances[k]
795  distances_to_structure_normalization[k[0]] += 1
796  distances_to_structure_normalization[k[1]] += 1
797 
798  for n in structure_pointers:
799  distances_to_structure[n] = distances_to_structure[n]/distances_to_structure_normalization[n]
800 
801  min_distance = min([distances_to_structure[n] for n in distances_to_structure])
802  centroid_index = [k for k, v in distances_to_structure.items() if v == min_distance][0]
803  centroid_rmf_name = self.rmf_names_frames[structure_set_name1][centroid_index]
804 
805  centroid_distance = 0.0
806  distance_list = []
807  for n in range(number_of_structures):
808  dist = self._get_distance(structure_set_name1,structure_set_name1,
809  selection_name,centroid_index,n)
810  centroid_distance += dist
811  distance_list.append(dist)
812 
813  #pairwise_distance=distance/len(distances.keys())
814  centroid_distance /= number_of_structures
815  #average_centroid_distance=sum(distances_to_structure)/len(distances_to_structure)
816  if outfile is not None:
817  of.write(str(selection_name)+" "+structure_set_name1+
818  " average centroid distance "+str(centroid_distance)+"\n")
819  of.write(str(selection_name)+" "+structure_set_name1+
820  " centroid index "+str(centroid_index)+"\n")
821  of.write(str(selection_name)+" "+structure_set_name1+
822  " centroid rmf name "+str(centroid_rmf_name)+"\n")
823  of.write(str(selection_name)+" "+structure_set_name1+
824  " median centroid distance "+str(np.median(distance_list))+"\n")
825 
826  average_pairwise_distances=sum(distances.values())/len(list(distances.values()))
827  if outfile is not None:
828  of.write(str(selection_name)+" "+structure_set_name1+" "+structure_set_name2+
829  " average pairwise distance "+str(average_pairwise_distances)+"\n")
830  if outfile is not None:
831  of.close()
832  return centroid_index
833 
834  def get_rmsf(self,
835  structure_set_name,
836  outdir="./",
837  skip=1,
838  set_plot_yaxis_range=None):
839  """ Calculate the residue mean square fluctuations (RMSF).
840  Automatically outputs as data file and pdf
841  @param structure_set_name Which structure set to calculate RMSF for
842  @param outdir Where to write the files
843  @param skip Skip this number of structures
844  @param set_plot_yaxis_range In case you need to change the plot
845  """
846  # get the centroid structure for the whole complex
847  centroid_index = self.get_precision(structure_set_name,
848  structure_set_name,
849  outfile=None,
850  skip=skip)
851  if self.rank==0:
852  for sel_name in self.protein_names:
853  self.selection_dictionary.update({sel_name:[sel_name]})
854  try:
855  number_of_structures = len(self.structures_dictionary[structure_set_name][sel_name])
856  except KeyError:
857  # that protein was not included in the selection
858  continue
859  rpim = self.residue_particle_index_map[sel_name]
860  outfile = outdir+"/rmsf."+sel_name+".dat"
861  of = open(outfile,"w")
862  residue_distances = {}
863  residue_nblock = {}
864  for index in range(number_of_structures):
865  distances = self._get_particle_distances(structure_set_name,
866  structure_set_name,
867  sel_name,
868  centroid_index,index)
869  for nblock,block in enumerate(rpim):
870  for residue_number in block:
871  residue_nblock[residue_number] = nblock
872  if residue_number not in residue_distances:
873  residue_distances[residue_number] = [distances[nblock]]
874  else:
875  residue_distances[residue_number].append(distances[nblock])
876 
877  residues = []
878  rmsfs = []
879  for rn in residue_distances:
880  residues.append(rn)
881  rmsf = np.std(residue_distances[rn])
882  rmsfs.append(rmsf)
883  of.write(str(rn)+" "+str(residue_nblock[rn])+" "+str(rmsf)+"\n")
884 
885  IMP.pmi.output.plot_xy_data(residues,rmsfs,title=sel_name,
886  out_fn=outdir+"/rmsf."+sel_name,display=False,
887  set_plot_yaxis_range=set_plot_yaxis_range,
888  xlabel='Residue Number',ylabel='Standard error')
889  of.close()
890 
891 
892  def set_reference_structure(self,rmf_name,rmf_frame_index):
893  """Read in a structure used for reference computation.
894  Needed before calling get_average_distance_wrt_reference_structure()
895  @param rmf_name The RMF file to read the reference
896  @param rmf_frame_index The index in that file
897  """
898  particles_resolution_one = self._get_structure(rmf_frame_index,rmf_name)
899  self.reference_rmf_names_frames = (rmf_name,rmf_frame_index)
900 
901  for selection_name in self.selection_dictionary:
902  selection_tuple = self.selection_dictionary[selection_name]
903  coords = self._select_coordinates(selection_tuple,
904  particles_resolution_one,self.prots[0])
905  self.reference_structures_dictionary[selection_name] = coords
906 
907  def get_rmsd_wrt_reference_structure_with_alignment(self,structure_set_name,alignment_selection_key):
908  """First align then calculate RMSD
909  @param structure_set_name: the name of the structure set
910  @param alignment_selection: the key containing the selection tuples needed to make the alignment stored in self.selection_dictionary
911  @return: for each structure in the structure set, returns the rmsd
912  """
913  if self.reference_structures_dictionary=={}:
914  print("Cannot compute until you set a reference structure")
915  return
916 
917  align_reference_coordinates = self.reference_structures_dictionary[alignment_selection_key]
918  align_coordinates = self.structures_dictionary[structure_set_name][alignment_selection_key]
919  transformations = []
920  for c in align_coordinates:
921  Ali = IMP.pmi.analysis.Alignment({"All":align_reference_coordinates}, {"All":c})
922  transformation = Ali.align()[1]
923  transformations.append(transformation)
924  for selection_name in self.selection_dictionary:
925  reference_coordinates = self.reference_structures_dictionary[selection_name]
926  coordinates2 = [IMP.algebra.Vector3D(c) for c in reference_coordinates]
927  distances = []
928  for n,sc in enumerate(self.structures_dictionary[structure_set_name][selection_name]):
929  coordinates1 = [transformations[n].get_transformed(IMP.algebra.Vector3D(c)) for c in sc]
930  distance = IMP.algebra.get_rmsd(coordinates1,coordinates2)
931  distances.append(distance)
932  print(selection_name,"average rmsd",sum(distances)/len(distances),"median",self._get_median(distances),"minimum distance",min(distances))
933 
934  def _get_median(self,list_of_values):
935  return np.median(np.array(list_of_values))
936 
937  def get_average_distance_wrt_reference_structure(self,structure_set_name):
938  """Compare the structure set to the reference structure.
939  @param structure_set_name The structure set to compute this on
940  @note First call set_reference_structure()
941  """
942  ret = {}
943  if self.reference_structures_dictionary=={}:
944  print("Cannot compute until you set a reference structure")
945  return
946  for selection_name in self.selection_dictionary:
947  reference_coordinates = self.reference_structures_dictionary[selection_name]
948  coordinates2 = [IMP.algebra.Vector3D(c) for c in reference_coordinates]
949  distances = []
950  for sc in self.structures_dictionary[structure_set_name][selection_name]:
951  coordinates1 = [IMP.algebra.Vector3D(c) for c in sc]
952  if self.style=='pairwise_drmsd_k':
953  distance = IMP.atom.get_drmsd(coordinates1,coordinates2)
954  if self.style=='pairwise_drms_k':
955  distance = IMP.atom.get_drms(coordinates1,coordinates2)
956  if self.style=='pairwise_drmsd_Q':
957  distance = IMP.atom.get_drmsd_Q(coordinates1,coordinates2,self.threshold)
958  if self.style=='pairwise_rmsd':
959  distance = IMP.algebra.get_rmsd(coordinates1,coordinates2)
960  distances.append(distance)
961 
962  print(selection_name,"average distance",sum(distances)/len(distances),"minimum distance",min(distances),'nframes',len(distances))
963  ret[selection_name] = {'average_distance':sum(distances)/len(distances),'minimum_distance':min(distances)}
964  return ret
965 
966  def get_coordinates(self):
967  pass
968 
969  def set_precision_style(self, style):
970  if style in self.styles:
971  self.style=style
972  else:
973  raise ValueError("No such style")
974 
975 
976 class GetModelDensity(object):
977  """Compute mean density maps from structures.
978 
979  Keeps a dictionary of density maps,
980  keys are in the custom ranges. When you call add_subunits_density, it adds
981  particle coordinates to the existing density maps.
982  """
983 
984  def __init__(self, custom_ranges, representation=None, resolution=20.0, voxel=5.0):
985  """Constructor.
986  @param custom_ranges Required. It's a dictionary, keys are the
987  density component names, values are selection tuples
988  e.g. {'kin28':[['kin28',1,-1]],
989  'density_name_1' :[('ccl1')],
990  'density_name_2' :[(1,142,'tfb3d1'),
991  (143,700,'tfb3d2')],
992  @param representation PMI representation, for doing selections.
993  Not needed if you only pass hierarchies
994  @param resolution The MRC resolution of the output map (in Angstrom unit)
995  @param voxel The voxel size for the output map (lower is slower)
996  """
997 
998  self.representation = representation
999  self.MRCresolution = resolution
1000  self.voxel = voxel
1001  self.densities = {}
1002  self.count_models = 0.0
1003  self.custom_ranges = custom_ranges
1004 
1005  def add_subunits_density(self, hierarchy=None):
1006  """Add a frame to the densities.
1007  @param hierarchy Optionally read the hierarchy from somewhere.
1008  If not passed, will just read the representation.
1009  """
1010  self.count_models += 1.0
1011 
1012  if hierarchy:
1013  part_dict = get_particles_at_resolution_one(hierarchy)
1014  all_particles_by_resolution = []
1015  for name in part_dict:
1016  all_particles_by_resolution += part_dict[name]
1017 
1018  for density_name in self.custom_ranges:
1019  parts = []
1020  if hierarchy:
1021  all_particles_by_segments = []
1022 
1023  for seg in self.custom_ranges[density_name]:
1024  if not hierarchy:
1025  # when you have a IMP.pmi.representation.Representation class
1026  parts += IMP.tools.select_by_tuple(self.representation,
1027  seg, resolution=1, name_is_ambiguous=False)
1028  else:
1029  # else, when you have a hierarchy, but not a representation
1030  if not IMP.pmi.get_is_canonical(hierarchy):
1031  for h in hierarchy.get_children():
1033  IMP.atom.Molecule.setup_particle(h.get_particle())
1034 
1035  if type(seg) == str:
1036  s = IMP.atom.Selection(hierarchy,molecule=seg)
1037  elif type(seg) == tuple:
1038  s = IMP.atom.Selection(
1039  hierarchy, molecule=seg[2],residue_indexes=range(seg[0], seg[1] + 1))
1040  else:
1041  raise Exception('could not understand selection tuple '+str(seg))
1042 
1043  all_particles_by_segments += s.get_selected_particles()
1044  if hierarchy:
1045  if IMP.pmi.get_is_canonical(hierarchy):
1046  parts = all_particles_by_segments
1047  else:
1048  parts = list(
1049  set(all_particles_by_segments) & set(all_particles_by_resolution))
1050  self._create_density_from_particles(parts, density_name)
1051 
1052  def normalize_density(self):
1053  pass
1054 
1055  def _create_density_from_particles(self, ps, name,
1056  kernel_type='GAUSSIAN'):
1057  '''Internal function for adding to densities.
1058  pass XYZR particles with mass and create a density from them.
1059  kernel type options are GAUSSIAN, BINARIZED_SPHERE, and SPHERE.'''
1060  kd = {
1061  'GAUSSIAN': IMP.em.GAUSSIAN,
1062  'BINARIZED_SPHERE': IMP.em.BINARIZED_SPHERE,
1063  'SPHERE': IMP.em.SPHERE}
1064 
1065  dmap = IMP.em.SampledDensityMap(ps, self.MRCresolution, self.voxel)
1066  dmap.calcRMS()
1067  dmap.set_was_used(True)
1068  if name not in self.densities:
1069  self.densities[name] = dmap
1070  else:
1071  bbox1 = IMP.em.get_bounding_box(self.densities[name])
1072  bbox2 = IMP.em.get_bounding_box(dmap)
1073  bbox1 += bbox2
1074  dmap3 = IMP.em.create_density_map(bbox1,self.voxel)
1075  dmap3.set_was_used(True)
1076  dmap3.add(dmap)
1077  dmap3.add(self.densities[name])
1078  self.densities[name] = dmap3
1079 
1080  def get_density_keys(self):
1081  return list(self.densities.keys())
1082 
1083  def get_density(self,name):
1084  """Get the current density for some component name"""
1085  if name not in self.densities:
1086  return None
1087  else:
1088  return self.densities[name]
1089 
1090  def write_mrc(self, path="./"):
1091  for density_name in self.densities:
1092  self.densities[density_name].multiply(1. / self.count_models)
1093  IMP.em.write_map(
1094  self.densities[density_name],
1095  path + "/" + density_name + ".mrc",
1097 
1098 
1099 class GetContactMap(object):
1100 
1101  def __init__(self, distance=15.):
1102  self.distance = distance
1103  self.contactmap = ''
1104  self.namelist = []
1105  self.xlinks = 0
1106  self.XL = {}
1107  self.expanded = {}
1108  self.resmap = {}
1109 
1110  def set_prot(self, prot):
1111  from scipy.spatial.distance import cdist
1112  self.prot = prot
1113  self.protnames = []
1114  coords = []
1115  radii = []
1116  namelist = []
1117 
1118  particles_dictionary = get_particles_at_resolution_one(self.prot)
1119 
1120  for name in particles_dictionary:
1121  residue_indexes = []
1122  for p in particles_dictionary[name]:
1123  print(p.get_name())
1124  residue_indexes += IMP.pmi.tools.get_residue_indexes(p)
1125  #residue_indexes.add( )
1126 
1127  if len(residue_indexes) != 0:
1128  self.protnames.append(name)
1129  for res in range(min(residue_indexes), max(residue_indexes) + 1):
1130  d = IMP.core.XYZR(p)
1131  new_name = name + ":" + str(res)
1132  if name not in self.resmap:
1133  self.resmap[name] = {}
1134  if res not in self.resmap:
1135  self.resmap[name][res] = {}
1136 
1137  self.resmap[name][res] = new_name
1138  namelist.append(new_name)
1139 
1140  crd = np.array([d.get_x(), d.get_y(), d.get_z()])
1141  coords.append(crd)
1142  radii.append(d.get_radius())
1143 
1144  coords = np.array(coords)
1145  radii = np.array(radii)
1146 
1147  if len(self.namelist) == 0:
1148  self.namelist = namelist
1149  self.contactmap = np.zeros((len(coords), len(coords)))
1150 
1151  distances = cdist(coords, coords)
1152  distances = (distances - radii).T - radii
1153  distances = distances <= self.distance
1154 
1155  print(coords)
1156  print(radii)
1157  print(distances)
1158 
1159  self.contactmap += distances
1160 
1161  def get_subunit_coords(self, frame, align=0):
1162  from scipy.spatial.distance import cdist
1163  coords = []
1164  radii = []
1165  namelist = []
1166  test, testr = [], []
1167  for part in self.prot.get_children():
1168  SortedSegments = []
1169  print(part)
1170  for chl in part.get_children():
1171  start = IMP.atom.get_leaves(chl)[0]
1172  end = IMP.atom.get_leaves(chl)[-1]
1173 
1174  startres = IMP.atom.Fragment(start).get_residue_indexes()[0]
1175  endres = IMP.atom.Fragment(end).get_residue_indexes()[-1]
1176  SortedSegments.append((chl, startres))
1177  SortedSegments = sorted(SortedSegments, key=itemgetter(1))
1178 
1179  for sgmnt in SortedSegments:
1180  for leaf in IMP.atom.get_leaves(sgmnt[0]):
1181  p = IMP.core.XYZR(leaf)
1182  crd = np.array([p.get_x(), p.get_y(), p.get_z()])
1183 
1184  coords.append(crd)
1185  radii.append(p.get_radius())
1186 
1187  new_name = part.get_name() + '_' + sgmnt[0].get_name() +\
1188  '_' + \
1189  str(IMP.atom.Fragment(leaf)
1190  .get_residue_indexes()[0])
1191  namelist.append(new_name)
1192  self.expanded[new_name] = len(
1193  IMP.atom.Fragment(leaf).get_residue_indexes())
1194  if part.get_name() not in self.resmap:
1195  self.resmap[part.get_name()] = {}
1196  for res in IMP.atom.Fragment(leaf).get_residue_indexes():
1197  self.resmap[part.get_name()][res] = new_name
1198 
1199  coords = np.array(coords)
1200  radii = np.array(radii)
1201  if len(self.namelist) == 0:
1202  self.namelist = namelist
1203  self.contactmap = np.zeros((len(coords), len(coords)))
1204  distances = cdist(coords, coords)
1205  distances = (distances - radii).T - radii
1206  distances = distances <= self.distance
1207  self.contactmap += distances
1208 
1209  def add_xlinks(
1210  self,
1211  filname,
1212  identification_string='ISDCrossLinkMS_Distance_'):
1213  # 'ISDCrossLinkMS_Distance_interrb_6629-State:0-20:RPS30_218:eIF3j-1-1-0.1_None'
1214  self.xlinks = 1
1215  data = open(filname)
1216  D = data.readlines()
1217  data.close()
1218 
1219  for d in D:
1220  if identification_string in d:
1221  d = d.replace(
1222  "_",
1223  " ").replace("-",
1224  " ").replace(":",
1225  " ").split()
1226 
1227  t1, t2 = (d[0], d[1]), (d[1], d[0])
1228  if t1 not in self.XL:
1229  self.XL[t1] = [(int(d[2]) + 1, int(d[3]) + 1)]
1230  self.XL[t2] = [(int(d[3]) + 1, int(d[2]) + 1)]
1231  else:
1232  self.XL[t1].append((int(d[2]) + 1, int(d[3]) + 1))
1233  self.XL[t2].append((int(d[3]) + 1, int(d[2]) + 1))
1234 
1235  def dist_matrix(self, skip_cmap=0, skip_xl=1):
1236  K = self.namelist
1237  M = self.contactmap
1238  C, R = [], []
1239  L = sum(self.expanded.values())
1240  proteins = self.protnames
1241 
1242  # exp new
1243  if skip_cmap == 0:
1244  Matrices = {}
1245  proteins = [p.get_name() for p in self.prot.get_children()]
1246  missing = []
1247  for p1 in range(len(proteins)):
1248  for p2 in range(p1, len(proteins)):
1249  pl1, pl2 = max(
1250  self.resmap[proteins[p1]].keys()), max(self.resmap[proteins[p2]].keys())
1251  pn1, pn2 = proteins[p1], proteins[p2]
1252  mtr = np.zeros((pl1 + 1, pl2 + 1))
1253  print('Creating matrix for: ', p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1254  for i1 in range(1, pl1 + 1):
1255  for i2 in range(1, pl2 + 1):
1256  try:
1257  r1 = K.index(self.resmap[pn1][i1])
1258  r2 = K.index(self.resmap[pn2][i2])
1259  r = M[r1, r2]
1260  mtr[i1 - 1, i2 - 1] = r
1261  except KeyError:
1262  missing.append((pn1, pn2, i1, i2))
1263  pass
1264  Matrices[(pn1, pn2)] = mtr
1265 
1266  # add cross-links
1267  if skip_xl == 0:
1268  if self.XL == {}:
1269  raise ValueError("cross-links were not provided, use add_xlinks function!")
1270  Matrices_xl = {}
1271  missing_xl = []
1272  for p1 in range(len(proteins)):
1273  for p2 in range(p1, len(proteins)):
1274  pl1, pl2 = max(
1275  self.resmap[proteins[p1]].keys()), max(self.resmap[proteins[p2]].keys())
1276  pn1, pn2 = proteins[p1], proteins[p2]
1277  mtr = np.zeros((pl1 + 1, pl2 + 1))
1278  flg = 0
1279  try:
1280  xls = self.XL[(pn1, pn2)]
1281  except KeyError:
1282  try:
1283  xls = self.XL[(pn2, pn1)]
1284  flg = 1
1285  except KeyError:
1286  flg = 2
1287  if flg == 0:
1288  print('Creating matrix for: ', p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1289  for xl1, xl2 in xls:
1290  if xl1 > pl1:
1291  print('X' * 10, xl1, xl2)
1292  xl1 = pl1
1293  if xl2 > pl2:
1294  print('X' * 10, xl1, xl2)
1295  xl2 = pl2
1296  mtr[xl1 - 1, xl2 - 1] = 100
1297  elif flg == 1:
1298  print('Creating matrix for: ', p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1299  for xl1, xl2 in xls:
1300  if xl1 > pl1:
1301  print('X' * 10, xl1, xl2)
1302  xl1 = pl1
1303  if xl2 > pl2:
1304  print('X' * 10, xl1, xl2)
1305  xl2 = pl2
1306  mtr[xl2 - 1, xl1 - 1] = 100
1307  else:
1308  raise RuntimeError('WTF!')
1309  Matrices_xl[(pn1, pn2)] = mtr
1310 
1311  # expand the matrix to individual residues
1312  #NewM = []
1313  # for x1 in xrange(len(K)):
1314  # lst = []
1315  # for x2 in xrange(len(K)):
1316  # lst += [M[x1,x2]]*self.expanded[K[x2]]
1317  # for i in xrange(self.expanded[K[x1]]): NewM.append(np.array(lst))
1318  #NewM = np.array(NewM)
1319 
1320  # make list of protein names and create coordinate lists
1321  C = proteins
1322  # W is the component length list,
1323  # R is the contiguous coordinates list
1324  W, R = [], []
1325  for i, c in enumerate(C):
1326  cl = max(self.resmap[c].keys())
1327  W.append(cl)
1328  if i == 0:
1329  R.append(cl)
1330  else:
1331  R.append(R[-1] + cl)
1332 
1333  # start plotting
1334  if filename:
1335  # Don't require a display
1336  import matplotlib as mpl
1337  mpl.use('Agg')
1338  import matplotlib.pyplot as plt
1339  import matplotlib.gridspec as gridspec
1340  import scipy.sparse as sparse
1341 
1342  f = plt.figure()
1343  gs = gridspec.GridSpec(len(W), len(W),
1344  width_ratios=W,
1345  height_ratios=W)
1346 
1347  cnt = 0
1348  for x1, r1 in enumerate(R):
1349  if x1 == 0:
1350  s1 = 0
1351  else:
1352  s1 = R[x1 - 1]
1353  for x2, r2 in enumerate(R):
1354  if x2 == 0:
1355  s2 = 0
1356  else:
1357  s2 = R[x2 - 1]
1358 
1359  ax = plt.subplot(gs[cnt])
1360  if skip_cmap == 0:
1361  try:
1362  mtr = Matrices[(C[x1], C[x2])]
1363  except KeyError:
1364  mtr = Matrices[(C[x2], C[x1])].T
1365  #cax = ax.imshow(log(NewM[s1:r1,s2:r2] / 1.), interpolation='nearest', vmin=0., vmax=log(NewM.max()))
1366  cax = ax.imshow(
1367  log(mtr),
1368  interpolation='nearest',
1369  vmin=0.,
1370  vmax=log(NewM.max()))
1371  ax.set_xticks([])
1372  ax.set_yticks([])
1373  if skip_xl == 0:
1374  try:
1375  mtr = Matrices_xl[(C[x1], C[x2])]
1376  except KeyError:
1377  mtr = Matrices_xl[(C[x2], C[x1])].T
1378  cax = ax.spy(
1379  sparse.csr_matrix(mtr),
1380  markersize=10,
1381  color='white',
1382  linewidth=100,
1383  alpha=0.5)
1384  ax.set_xticks([])
1385  ax.set_yticks([])
1386 
1387  cnt += 1
1388  if x2 == 0:
1389  ax.set_ylabel(C[x1], rotation=90)
1390  plt.show()
1391 
1392 
1393 # ------------------------------------------------------------------
1394 # a few random tools
1395 
1396 def get_hiers_from_rmf(model, frame_number, rmf_file):
1397  # I have to deprecate this function
1398  print("getting coordinates for frame %i rmf file %s" % (frame_number, rmf_file))
1399 
1400  # load the frame
1401  rh = RMF.open_rmf_file_read_only(rmf_file)
1402 
1403  try:
1404  prots = IMP.rmf.create_hierarchies(rh, model)
1405  except IOError:
1406  print("Unable to open rmf file %s" % (rmf_file))
1407  return None
1408  #IMP.rmf.link_hierarchies(rh, prots)
1409 
1410  try:
1411  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1412  except IOError:
1413  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1414  return None
1415  model.update()
1416  del rh
1417 
1418  return prots
1419 
1420 def link_hiers_to_rmf(model,hiers,frame_number, rmf_file):
1421  print("linking hierarchies for frame %i rmf file %s" % (frame_number, rmf_file))
1422  rh = RMF.open_rmf_file_read_only(rmf_file)
1423  IMP.rmf.link_hierarchies(rh, hiers)
1424  try:
1425  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1426  except:
1427  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1428  return False
1429  model.update()
1430  del rh
1431  return True
1432 
1433 
1434 def get_hiers_and_restraints_from_rmf(model, frame_number, rmf_file):
1435  # I have to deprecate this function
1436  print("getting coordinates for frame %i rmf file %s" % (frame_number, rmf_file))
1437 
1438  # load the frame
1439  rh = RMF.open_rmf_file_read_only(rmf_file)
1440 
1441  try:
1442  prots = IMP.rmf.create_hierarchies(rh, model)
1443  rs = IMP.rmf.create_restraints(rh, model)
1444  except:
1445  print("Unable to open rmf file %s" % (rmf_file))
1446  return None,None
1447  try:
1448  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1449  except:
1450  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1451  return None,None
1452  model.update()
1453  del rh
1454 
1455  return prots,rs
1456 
1457 def link_hiers_and_restraints_to_rmf(model,hiers,rs, frame_number, rmf_file):
1458  print("linking hierarchies for frame %i rmf file %s" % (frame_number, rmf_file))
1459  rh = RMF.open_rmf_file_read_only(rmf_file)
1460  IMP.rmf.link_hierarchies(rh, hiers)
1461  IMP.rmf.link_restraints(rh, rs)
1462  try:
1463  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1464  except:
1465  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1466  return False
1467  model.update()
1468  del rh
1469  return True
1470 
1471 
1472 def get_hiers_from_rmf(model, frame_number, rmf_file):
1473  print("getting coordinates for frame %i rmf file %s" % (frame_number, rmf_file))
1474 
1475  # load the frame
1476  rh = RMF.open_rmf_file_read_only(rmf_file)
1477 
1478  try:
1479  prots = IMP.rmf.create_hierarchies(rh, model)
1480  except:
1481  print("Unable to open rmf file %s" % (rmf_file))
1482  prot = None
1483  return prot
1484  #IMP.rmf.link_hierarchies(rh, prots)
1485  try:
1486  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1487  except:
1488  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1489  prots = None
1490  model.update()
1491  del rh
1492  return prots
1493 
1494 
1496  """Get particles at res 1, or any beads, based on the name.
1497  No Representation is needed. This is mainly used when the hierarchy
1498  is read from an RMF file.
1499  @return a dictionary of component names and their particles
1500  \note If the root node is named "System" or is a "State", do proper selection.
1501  """
1502  particle_dict = {}
1503 
1504  # attempt to give good results for PMI2
1505  if IMP.pmi.get_is_canonical(prot):
1506  for mol in IMP.atom.get_by_type(prot,IMP.atom.MOLECULE_TYPE):
1507  sel = IMP.atom.Selection(mol,resolution=1)
1508  particle_dict[mol.get_name()] = sel.get_selected_particles()
1509  else:
1510  allparticles = []
1511  for c in prot.get_children():
1512  name = c.get_name()
1513  particle_dict[name] = IMP.atom.get_leaves(c)
1514  for s in c.get_children():
1515  if "_Res:1" in s.get_name() and "_Res:10" not in s.get_name():
1516  allparticles += IMP.atom.get_leaves(s)
1517  if "Beads" in s.get_name():
1518  allparticles += IMP.atom.get_leaves(s)
1519 
1520  particle_align = []
1521  for name in particle_dict:
1522  particle_dict[name] = IMP.pmi.tools.sort_by_residues(
1523  list(set(particle_dict[name]) & set(allparticles)))
1524  return particle_dict
1525 
1527  """Get particles at res 10, or any beads, based on the name.
1528  No Representation is needed.
1529  This is mainly used when the hierarchy is read from an RMF file.
1530  @return a dictionary of component names and their particles
1531  \note If the root node is named "System" or is a "State", do proper selection.
1532  """
1533  particle_dict = {}
1534  # attempt to give good results for PMI2
1535  if IMP.pmi.get_is_canonical(prot):
1536  for mol in IMP.atom.get_by_type(prot,IMP.atom.MOLECULE_TYPE):
1537  sel = IMP.atom.Selection(mol,resolution=10)
1538  particle_dict[mol.get_name()] = sel.get_selected_particles()
1539  else:
1540  allparticles = []
1541  for c in prot.get_children():
1542  name = c.get_name()
1543  particle_dict[name] = IMP.atom.get_leaves(c)
1544  for s in c.get_children():
1545  if "_Res:10" in s.get_name():
1546  allparticles += IMP.atom.get_leaves(s)
1547  if "Beads" in s.get_name():
1548  allparticles += IMP.atom.get_leaves(s)
1549  particle_align = []
1550  for name in particle_dict:
1551  particle_dict[name] = IMP.pmi.tools.sort_by_residues(
1552  list(set(particle_dict[name]) & set(allparticles)))
1553  return particle_dict
1554 
1555 def select_by_tuple(first_res_last_res_name_tuple):
1556  first_res = first_res_last_res_hier_tuple[0]
1557  last_res = first_res_last_res_hier_tuple[1]
1558  name = first_res_last_res_hier_tuple[2]
1559 
1560 class CrossLinkTable(object):
1561  """Visualization of crosslinks"""
1562  def __init__(self):
1563  self.crosslinks = []
1564  self.external_csv_data = None
1565  self.crosslinkedprots = set()
1566  self.mindist = +10000000.0
1567  self.maxdist = -10000000.0
1568  self.contactmap = None
1569 
1570  def set_hierarchy(self, prot):
1571  self.prot_length_dict = {}
1572  self.model=prot.get_model()
1573 
1574  for i in prot.get_children():
1575  name = i.get_name()
1576  residue_indexes = []
1577  for p in IMP.atom.get_leaves(i):
1578  residue_indexes += IMP.pmi.tools.get_residue_indexes(p)
1579 
1580  if len(residue_indexes) != 0:
1581  self.prot_length_dict[name] = max(residue_indexes)
1582 
1583  def set_coordinates_for_contact_map(self, rmf_name,rmf_frame_index):
1584  from scipy.spatial.distance import cdist
1585 
1586  rh= RMF.open_rmf_file_read_only(rmf_name)
1587  prots=IMP.rmf.create_hierarchies(rh, self.model)
1588  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
1589  print("getting coordinates for frame %i rmf file %s" % (rmf_frame_index, rmf_name))
1590  del rh
1591 
1592 
1593  coords = []
1594  radii = []
1595  namelist = []
1596 
1597  particles_dictionary = get_particles_at_resolution_one(prots[0])
1598 
1599  resindex = 0
1600  self.index_dictionary = {}
1601 
1602  for name in particles_dictionary:
1603  residue_indexes = []
1604  for p in particles_dictionary[name]:
1605  print(p.get_name())
1606  residue_indexes = IMP.pmi.tools.get_residue_indexes(p)
1607  #residue_indexes.add( )
1608 
1609  if len(residue_indexes) != 0:
1610 
1611  for res in range(min(residue_indexes), max(residue_indexes) + 1):
1612  d = IMP.core.XYZR(p)
1613 
1614  crd = np.array([d.get_x(), d.get_y(), d.get_z()])
1615  coords.append(crd)
1616  radii.append(d.get_radius())
1617  if name not in self.index_dictionary:
1618  self.index_dictionary[name] = [resindex]
1619  else:
1620  self.index_dictionary[name].append(resindex)
1621  resindex += 1
1622 
1623  coords = np.array(coords)
1624  radii = np.array(radii)
1625 
1626  distances = cdist(coords, coords)
1627  distances = (distances - radii).T - radii
1628 
1629  distances = np.where(distances <= 20.0, 1.0, 0)
1630  if self.contactmap is None:
1631  self.contactmap = np.zeros((len(coords), len(coords)))
1632  self.contactmap += distances
1633 
1634  for prot in prots: IMP.atom.destroy(prot)
1635 
1636  def set_crosslinks(
1637  self, data_file, search_label='ISDCrossLinkMS_Distance_',
1638  mapping=None,
1639  filter_label=None,
1640  filter_rmf_file_names=None, #provide a list of rmf base names to filter the stat file
1641  external_csv_data_file=None,
1642  external_csv_data_file_unique_id_key="Unique ID"):
1643 
1644  # example key ISDCrossLinkMS_Distance_intrarb_937-State:0-108:RPS3_55:RPS30-1-1-0.1_None
1645  # mapping is a dictionary that maps standard keywords to entry positions in the key string
1646  # confidence class is a filter that
1647  # external datafile is a datafile that contains further information on the crosslinks
1648  # it will use the unique id to create the dictionary keys
1649 
1650  po = IMP.pmi.output.ProcessOutput(data_file)
1651  keys = po.get_keys()
1652 
1653  xl_keys = [k for k in keys if search_label in k]
1654 
1655  if filter_rmf_file_names is not None:
1656  rmf_file_key="local_rmf_file_name"
1657  fs = po.get_fields(xl_keys+[rmf_file_key])
1658  else:
1659  fs = po.get_fields(xl_keys)
1660 
1661  # this dictionary stores the occurency of given crosslinks
1662  self.cross_link_frequency = {}
1663 
1664  # this dictionary stores the series of distances for given crosslinked
1665  # residues
1666  self.cross_link_distances = {}
1667 
1668  # this dictionary stores the series of distances for given crosslinked
1669  # residues
1670  self.cross_link_distances_unique = {}
1671 
1672  if not external_csv_data_file is None:
1673  # this dictionary stores the further information on crosslinks
1674  # labeled by unique ID
1675  self.external_csv_data = {}
1676  xldb = IMP.pmi.tools.get_db_from_csv(external_csv_data_file)
1677 
1678  for xl in xldb:
1679  self.external_csv_data[
1680  xl[external_csv_data_file_unique_id_key]] = xl
1681 
1682  # this list keeps track the tuple of cross-links and sample
1683  # so that we don't count twice the same crosslinked residues in the
1684  # same sample
1685  cross_link_frequency_list = []
1686 
1687  self.unique_cross_link_list = []
1688 
1689  for key in xl_keys:
1690  print(key)
1691  keysplit = key.replace(
1692  "_",
1693  " ").replace(
1694  "-",
1695  " ").replace(
1696  ":",
1697  " ").split(
1698  )
1699 
1700  if filter_label!=None:
1701  if filter_label not in keysplit: continue
1702 
1703  if mapping is None:
1704  r1 = int(keysplit[5])
1705  c1 = keysplit[6]
1706  r2 = int(keysplit[7])
1707  c2 = keysplit[8]
1708  try:
1709  confidence = keysplit[12]
1710  except:
1711  confidence = '0.0'
1712  try:
1713  unique_identifier = keysplit[3]
1714  except:
1715  unique_identifier = '0'
1716  else:
1717  r1 = int(keysplit[mapping["Residue1"]])
1718  c1 = keysplit[mapping["Protein1"]]
1719  r2 = int(keysplit[mapping["Residue2"]])
1720  c2 = keysplit[mapping["Protein2"]]
1721  try:
1722  confidence = keysplit[mapping["Confidence"]]
1723  except:
1724  confidence = '0.0'
1725  try:
1726  unique_identifier = keysplit[mapping["Unique Identifier"]]
1727  except:
1728  unique_identifier = '0'
1729 
1730  self.crosslinkedprots.add(c1)
1731  self.crosslinkedprots.add(c2)
1732 
1733  # construct the list of distances corresponding to the input rmf
1734  # files
1735 
1736  dists=[]
1737  if filter_rmf_file_names is not None:
1738  for n,d in enumerate(fs[key]):
1739  if fs[rmf_file_key][n] in filter_rmf_file_names:
1740  dists.append(float(d))
1741  else:
1742  dists=[float(f) for f in fs[key]]
1743 
1744  # check if the input confidence class corresponds to the
1745  # one of the cross-link
1746 
1747  mdist = self.median(dists)
1748 
1749  stdv = np.std(np.array(dists))
1750  if self.mindist > mdist:
1751  self.mindist = mdist
1752  if self.maxdist < mdist:
1753  self.maxdist = mdist
1754 
1755  # calculate the frequency of unique crosslinks within the same
1756  # sample
1757  if not self.external_csv_data is None:
1758  sample = self.external_csv_data[unique_identifier]["Sample"]
1759  else:
1760  sample = "None"
1761 
1762  if (r1, c1, r2, c2,mdist) not in cross_link_frequency_list:
1763  if (r1, c1, r2, c2) not in self.cross_link_frequency:
1764  self.cross_link_frequency[(r1, c1, r2, c2)] = 1
1765  self.cross_link_frequency[(r2, c2, r1, c1)] = 1
1766  else:
1767  self.cross_link_frequency[(r2, c2, r1, c1)] += 1
1768  self.cross_link_frequency[(r1, c1, r2, c2)] += 1
1769  cross_link_frequency_list.append((r1, c1, r2, c2))
1770  cross_link_frequency_list.append((r2, c2, r1, c1))
1771  self.unique_cross_link_list.append(
1772  (r1, c1, r2, c2,mdist))
1773 
1774  if (r1, c1, r2, c2) not in self.cross_link_distances:
1775  self.cross_link_distances[(
1776  r1,
1777  c1,
1778  r2,
1779  c2,
1780  mdist,
1781  confidence)] = dists
1782  self.cross_link_distances[(
1783  r2,
1784  c2,
1785  r1,
1786  c1,
1787  mdist,
1788  confidence)] = dists
1789  self.cross_link_distances_unique[(r1, c1, r2, c2)] = dists
1790  else:
1791  self.cross_link_distances[(
1792  r2,
1793  c2,
1794  r1,
1795  c1,
1796  mdist,
1797  confidence)] += dists
1798  self.cross_link_distances[(
1799  r1,
1800  c1,
1801  r2,
1802  c2,
1803  mdist,
1804  confidence)] += dists
1805 
1806  self.crosslinks.append(
1807  (r1,
1808  c1,
1809  r2,
1810  c2,
1811  mdist,
1812  stdv,
1813  confidence,
1814  unique_identifier,
1815  'original'))
1816  self.crosslinks.append(
1817  (r2,
1818  c2,
1819  r1,
1820  c1,
1821  mdist,
1822  stdv,
1823  confidence,
1824  unique_identifier,
1825  'reversed'))
1826 
1827  self.cross_link_frequency_inverted = {}
1828  for xl in self.unique_cross_link_list:
1829  (r1, c1, r2, c2, mdist) = xl
1830  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
1831  if frequency not in self.cross_link_frequency_inverted:
1832  self.cross_link_frequency_inverted[
1833  frequency] = [(r1, c1, r2, c2)]
1834  else:
1835  self.cross_link_frequency_inverted[
1836  frequency].append((r1, c1, r2, c2))
1837 
1838  # -------------
1839 
1840  def median(self, mylist):
1841  sorts = sorted(mylist)
1842  length = len(sorts)
1843  print(length)
1844  if length == 1:
1845  return mylist[0]
1846  if not length % 2:
1847  return (sorts[length / 2] + sorts[length / 2 - 1]) / 2.0
1848  return sorts[length / 2]
1849 
1850  def set_threshold(self,threshold):
1851  self.threshold=threshold
1852 
1853  def set_tolerance(self,tolerance):
1854  self.tolerance=tolerance
1855 
1856  def colormap(self, dist):
1857  if dist < self.threshold - self.tolerance:
1858  return "Green"
1859  elif dist >= self.threshold + self.tolerance:
1860  return "Orange"
1861  else:
1862  return "Red"
1863 
1864  def write_cross_link_database(self, filename, format='csv'):
1865  import csv
1866 
1867  fieldnames = [
1868  "Unique ID", "Protein1", "Residue1", "Protein2", "Residue2",
1869  "Median Distance", "Standard Deviation", "Confidence", "Frequency", "Arrangement"]
1870 
1871  if not self.external_csv_data is None:
1872  keys = list(self.external_csv_data.keys())
1873  innerkeys = list(self.external_csv_data[keys[0]].keys())
1874  innerkeys.sort()
1875  fieldnames += innerkeys
1876 
1877  dw = csv.DictWriter(
1878  open(filename,
1879  "w"),
1880  delimiter=',',
1881  fieldnames=fieldnames)
1882  dw.writeheader()
1883  for xl in self.crosslinks:
1884  (r1, c1, r2, c2, mdist, stdv, confidence,
1885  unique_identifier, descriptor) = xl
1886  if descriptor == 'original':
1887  outdict = {}
1888  outdict["Unique ID"] = unique_identifier
1889  outdict["Protein1"] = c1
1890  outdict["Protein2"] = c2
1891  outdict["Residue1"] = r1
1892  outdict["Residue2"] = r2
1893  outdict["Median Distance"] = mdist
1894  outdict["Standard Deviation"] = stdv
1895  outdict["Confidence"] = confidence
1896  outdict["Frequency"] = self.cross_link_frequency[
1897  (r1, c1, r2, c2)]
1898  if c1 == c2:
1899  arrangement = "Intra"
1900  else:
1901  arrangement = "Inter"
1902  outdict["Arrangement"] = arrangement
1903  if not self.external_csv_data is None:
1904  outdict.update(self.external_csv_data[unique_identifier])
1905 
1906  dw.writerow(outdict)
1907 
1908  def plot(self, prot_listx=None, prot_listy=None, no_dist_info=False,
1909  no_confidence_info=False, filter=None, layout="whole", crosslinkedonly=False,
1910  filename=None, confidence_classes=None, alphablend=0.1, scale_symbol_size=1.0,
1911  gap_between_components=0,
1912  rename_protein_map=None):
1913  # layout can be:
1914  # "lowerdiagonal" print only the lower diagonal plot
1915  # "upperdiagonal" print only the upper diagonal plot
1916  # "whole" print all
1917  # crosslinkedonly: plot only components that have crosslinks
1918  # no_dist_info: if True will plot only the cross-links as grey spots
1919  # filter = tuple the tuple contains a keyword to be search in the database
1920  # a relationship ">","==","<"
1921  # and a value
1922  # example ("ID_Score",">",40)
1923  # scale_symbol_size rescale the symbol for the crosslink
1924  # rename_protein_map is a dictionary to rename proteins
1925 
1926  import matplotlib as mpl
1927  mpl.use('Agg')
1928  import matplotlib.pyplot as plt
1929  import matplotlib.cm as cm
1930 
1931  fig = plt.figure(figsize=(10, 10))
1932  ax = fig.add_subplot(111)
1933 
1934  ax.set_xticks([])
1935  ax.set_yticks([])
1936 
1937  # set the list of proteins on the x axis
1938  if prot_listx is None:
1939  if crosslinkedonly:
1940  prot_listx = list(self.crosslinkedprots)
1941  else:
1942  prot_listx = list(self.prot_length_dict.keys())
1943  prot_listx.sort()
1944 
1945  nresx = gap_between_components + \
1946  sum([self.prot_length_dict[name]
1947  + gap_between_components for name in prot_listx])
1948 
1949  # set the list of proteins on the y axis
1950 
1951  if prot_listy is None:
1952  if crosslinkedonly:
1953  prot_listy = list(self.crosslinkedprots)
1954  else:
1955  prot_listy = list(self.prot_length_dict.keys())
1956  prot_listy.sort()
1957 
1958  nresy = gap_between_components + \
1959  sum([self.prot_length_dict[name]
1960  + gap_between_components for name in prot_listy])
1961 
1962  # this is the residue offset for each protein
1963  resoffsetx = {}
1964  resendx = {}
1965  res = gap_between_components
1966  for prot in prot_listx:
1967  resoffsetx[prot] = res
1968  res += self.prot_length_dict[prot]
1969  resendx[prot] = res
1970  res += gap_between_components
1971 
1972  resoffsety = {}
1973  resendy = {}
1974  res = gap_between_components
1975  for prot in prot_listy:
1976  resoffsety[prot] = res
1977  res += self.prot_length_dict[prot]
1978  resendy[prot] = res
1979  res += gap_between_components
1980 
1981  resoffsetdiagonal = {}
1982  res = gap_between_components
1983  for prot in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
1984  resoffsetdiagonal[prot] = res
1985  res += self.prot_length_dict[prot]
1986  res += gap_between_components
1987 
1988  # plot protein boundaries
1989 
1990  xticks = []
1991  xlabels = []
1992  for n, prot in enumerate(prot_listx):
1993  res = resoffsetx[prot]
1994  end = resendx[prot]
1995  for proty in prot_listy:
1996  resy = resoffsety[proty]
1997  endy = resendy[proty]
1998  ax.plot([res, res], [resy, endy], 'k-', lw=0.4)
1999  ax.plot([end, end], [resy, endy], 'k-', lw=0.4)
2000  xticks.append((float(res) + float(end)) / 2)
2001  if rename_protein_map is not None:
2002  if prot in rename_protein_map:
2003  prot=rename_protein_map[prot]
2004  xlabels.append(prot)
2005 
2006  yticks = []
2007  ylabels = []
2008  for n, prot in enumerate(prot_listy):
2009  res = resoffsety[prot]
2010  end = resendy[prot]
2011  for protx in prot_listx:
2012  resx = resoffsetx[protx]
2013  endx = resendx[protx]
2014  ax.plot([resx, endx], [res, res], 'k-', lw=0.4)
2015  ax.plot([resx, endx], [end, end], 'k-', lw=0.4)
2016  yticks.append((float(res) + float(end)) / 2)
2017  if rename_protein_map is not None:
2018  if prot in rename_protein_map:
2019  prot=rename_protein_map[prot]
2020  ylabels.append(prot)
2021 
2022  # plot the contact map
2023  print(prot_listx, prot_listy)
2024 
2025  if not self.contactmap is None:
2026  import matplotlib.cm as cm
2027  tmp_array = np.zeros((nresx, nresy))
2028 
2029  for px in prot_listx:
2030  print(px)
2031  for py in prot_listy:
2032  print(py)
2033  resx = resoffsety[px]
2034  lengx = resendx[px] - 1
2035  resy = resoffsety[py]
2036  lengy = resendy[py] - 1
2037  indexes_x = self.index_dictionary[px]
2038  minx = min(indexes_x)
2039  maxx = max(indexes_x)
2040  indexes_y = self.index_dictionary[py]
2041  miny = min(indexes_y)
2042  maxy = max(indexes_y)
2043 
2044  print(px, py, minx, maxx, miny, maxy)
2045 
2046  try:
2047  tmp_array[
2048  resx:lengx,
2049  resy:lengy] = self.contactmap[
2050  minx:maxx,
2051  miny:maxy]
2052  except:
2053  continue
2054 
2055 
2056  ax.imshow(tmp_array,
2057  cmap=cm.binary,
2058  origin='lower',
2059  interpolation='nearest')
2060 
2061  ax.set_xticks(xticks)
2062  ax.set_xticklabels(xlabels, rotation=90)
2063  ax.set_yticks(yticks)
2064  ax.set_yticklabels(ylabels)
2065  ax.set_xlim(0,nresx)
2066  ax.set_ylim(0,nresy)
2067 
2068 
2069  # set the crosslinks
2070 
2071  already_added_xls = []
2072 
2073  for xl in self.crosslinks:
2074 
2075  (r1, c1, r2, c2, mdist, stdv, confidence,
2076  unique_identifier, descriptor) = xl
2077 
2078  if confidence_classes is not None:
2079  if confidence not in confidence_classes:
2080  continue
2081 
2082  try:
2083  pos1 = r1 + resoffsetx[c1]
2084  except:
2085  continue
2086  try:
2087  pos2 = r2 + resoffsety[c2]
2088  except:
2089  continue
2090 
2091  if not filter is None:
2092  xldb = self.external_csv_data[unique_identifier]
2093  xldb.update({"Distance": mdist})
2094  xldb.update({"Distance_stdv": stdv})
2095 
2096  if filter[1] == ">":
2097  if float(xldb[filter[0]]) <= float(filter[2]):
2098  continue
2099 
2100  if filter[1] == "<":
2101  if float(xldb[filter[0]]) >= float(filter[2]):
2102  continue
2103 
2104  if filter[1] == "==":
2105  if float(xldb[filter[0]]) != float(filter[2]):
2106  continue
2107 
2108  # all that below is used for plotting the diagonal
2109  # when you have a rectangolar plots
2110 
2111  pos_for_diagonal1 = r1 + resoffsetdiagonal[c1]
2112  pos_for_diagonal2 = r2 + resoffsetdiagonal[c2]
2113 
2114  if layout == 'lowerdiagonal':
2115  if pos_for_diagonal1 <= pos_for_diagonal2:
2116  continue
2117  if layout == 'upperdiagonal':
2118  if pos_for_diagonal1 >= pos_for_diagonal2:
2119  continue
2120 
2121  already_added_xls.append((r1, c1, r2, c2))
2122 
2123  if not no_confidence_info:
2124  if confidence == '0.01':
2125  markersize = 14 * scale_symbol_size
2126  elif confidence == '0.05':
2127  markersize = 9 * scale_symbol_size
2128  elif confidence == '0.1':
2129  markersize = 6 * scale_symbol_size
2130  else:
2131  markersize = 15 * scale_symbol_size
2132  else:
2133  markersize = 5 * scale_symbol_size
2134 
2135  if not no_dist_info:
2136  color = self.colormap(mdist)
2137  else:
2138  color = "gray"
2139 
2140  ax.plot(
2141  [pos1],
2142  [pos2],
2143  'o',
2144  c=color,
2145  alpha=alphablend,
2146  markersize=markersize)
2147 
2148 
2149 
2150  fig.set_size_inches(0.004 * nresx, 0.004 * nresy)
2151 
2152  [i.set_linewidth(2.0) for i in ax.spines.values()]
2153 
2154  #plt.tight_layout()
2155 
2156  if filename:
2157  plt.savefig(filename + ".pdf", dpi=300, transparent="False")
2158  else:
2159  plt.show()
2160 
2161  def get_frequency_statistics(self, prot_list,
2162  prot_list2=None):
2163 
2164  violated_histogram = {}
2165  satisfied_histogram = {}
2166  unique_cross_links = []
2167 
2168  for xl in self.unique_cross_link_list:
2169  (r1, c1, r2, c2, mdist) = xl
2170 
2171  # here we filter by the protein
2172  if prot_list2 is None:
2173  if not c1 in prot_list:
2174  continue
2175  if not c2 in prot_list:
2176  continue
2177  else:
2178  if c1 in prot_list and c2 in prot_list2:
2179  pass
2180  elif c1 in prot_list2 and c2 in prot_list:
2181  pass
2182  else:
2183  continue
2184 
2185  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
2186 
2187  if (r1, c1, r2, c2) not in unique_cross_links:
2188  if mdist > 35.0:
2189  if frequency not in violated_histogram:
2190  violated_histogram[frequency] = 1
2191  else:
2192  violated_histogram[frequency] += 1
2193  else:
2194  if frequency not in satisfied_histogram:
2195  satisfied_histogram[frequency] = 1
2196  else:
2197  satisfied_histogram[frequency] += 1
2198  unique_cross_links.append((r1, c1, r2, c2))
2199  unique_cross_links.append((r2, c2, r1, c1))
2200 
2201  print("# satisfied")
2202 
2203  total_number_of_crosslinks=0
2204 
2205  for i in satisfied_histogram:
2206  # if i in violated_histogram:
2207  # print i, satisfied_histogram[i]+violated_histogram[i]
2208  # else:
2209  if i in violated_histogram:
2210  print(i, violated_histogram[i]+satisfied_histogram[i])
2211  else:
2212  print(i, satisfied_histogram[i])
2213  total_number_of_crosslinks+=i*satisfied_histogram[i]
2214 
2215  print("# violated")
2216 
2217  for i in violated_histogram:
2218  print(i, violated_histogram[i])
2219  total_number_of_crosslinks+=i*violated_histogram[i]
2220 
2221  print(total_number_of_crosslinks)
2222 
2223 
2224 # ------------
2225  def print_cross_link_binary_symbols(self, prot_list,
2226  prot_list2=None):
2227  tmp_matrix = []
2228  confidence_list = []
2229  for xl in self.crosslinks:
2230  (r1, c1, r2, c2, mdist, stdv, confidence,
2231  unique_identifier, descriptor) = xl
2232 
2233  if prot_list2 is None:
2234  if not c1 in prot_list:
2235  continue
2236  if not c2 in prot_list:
2237  continue
2238  else:
2239  if c1 in prot_list and c2 in prot_list2:
2240  pass
2241  elif c1 in prot_list2 and c2 in prot_list:
2242  pass
2243  else:
2244  continue
2245 
2246  if descriptor != "original":
2247  continue
2248 
2249  confidence_list.append(confidence)
2250 
2251  dists = self.cross_link_distances_unique[(r1, c1, r2, c2)]
2252  tmp_dist_binary = []
2253  for d in dists:
2254  if d < 35:
2255  tmp_dist_binary.append(1)
2256  else:
2257  tmp_dist_binary.append(0)
2258  tmp_matrix.append(tmp_dist_binary)
2259 
2260  matrix = list(zip(*tmp_matrix))
2261 
2262  satisfied_high_sum = 0
2263  satisfied_mid_sum = 0
2264  satisfied_low_sum = 0
2265  total_satisfied_sum = 0
2266  for k, m in enumerate(matrix):
2267  satisfied_high = 0
2268  total_high = 0
2269  satisfied_mid = 0
2270  total_mid = 0
2271  satisfied_low = 0
2272  total_low = 0
2273  total_satisfied = 0
2274  total = 0
2275  for n, b in enumerate(m):
2276  if confidence_list[n] == "0.01":
2277  total_high += 1
2278  if b == 1:
2279  satisfied_high += 1
2280  satisfied_high_sum += 1
2281  elif confidence_list[n] == "0.05":
2282  total_mid += 1
2283  if b == 1:
2284  satisfied_mid += 1
2285  satisfied_mid_sum += 1
2286  elif confidence_list[n] == "0.1":
2287  total_low += 1
2288  if b == 1:
2289  satisfied_low += 1
2290  satisfied_low_sum += 1
2291  if b == 1:
2292  total_satisfied += 1
2293  total_satisfied_sum += 1
2294  total += 1
2295  print(k, satisfied_high, total_high)
2296  print(k, satisfied_mid, total_mid)
2297  print(k, satisfied_low, total_low)
2298  print(k, total_satisfied, total)
2299  print(float(satisfied_high_sum) / len(matrix))
2300  print(float(satisfied_mid_sum) / len(matrix))
2301  print(float(satisfied_low_sum) / len(matrix))
2302 # ------------
2303 
2304  def get_unique_crosslinks_statistics(self, prot_list,
2305  prot_list2=None):
2306 
2307  print(prot_list)
2308  print(prot_list2)
2309  satisfied_high = 0
2310  total_high = 0
2311  satisfied_mid = 0
2312  total_mid = 0
2313  satisfied_low = 0
2314  total_low = 0
2315  total = 0
2316  tmp_matrix = []
2317  satisfied_string = []
2318  for xl in self.crosslinks:
2319  (r1, c1, r2, c2, mdist, stdv, confidence,
2320  unique_identifier, descriptor) = xl
2321 
2322  if prot_list2 is None:
2323  if not c1 in prot_list:
2324  continue
2325  if not c2 in prot_list:
2326  continue
2327  else:
2328  if c1 in prot_list and c2 in prot_list2:
2329  pass
2330  elif c1 in prot_list2 and c2 in prot_list:
2331  pass
2332  else:
2333  continue
2334 
2335  if descriptor != "original":
2336  continue
2337 
2338  total += 1
2339  if confidence == "0.01":
2340  total_high += 1
2341  if mdist <= 35:
2342  satisfied_high += 1
2343  if confidence == "0.05":
2344  total_mid += 1
2345  if mdist <= 35:
2346  satisfied_mid += 1
2347  if confidence == "0.1":
2348  total_low += 1
2349  if mdist <= 35:
2350  satisfied_low += 1
2351  if mdist <= 35:
2352  satisfied_string.append(1)
2353  else:
2354  satisfied_string.append(0)
2355 
2356  dists = self.cross_link_distances_unique[(r1, c1, r2, c2)]
2357  tmp_dist_binary = []
2358  for d in dists:
2359  if d < 35:
2360  tmp_dist_binary.append(1)
2361  else:
2362  tmp_dist_binary.append(0)
2363  tmp_matrix.append(tmp_dist_binary)
2364 
2365  print("unique satisfied_high/total_high", satisfied_high, "/", total_high)
2366  print("unique satisfied_mid/total_mid", satisfied_mid, "/", total_mid)
2367  print("unique satisfied_low/total_low", satisfied_low, "/", total_low)
2368  print("total", total)
2369 
2370  matrix = list(zip(*tmp_matrix))
2371  satisfied_models = 0
2372  satstr = ""
2373  for b in satisfied_string:
2374  if b == 0:
2375  satstr += "-"
2376  if b == 1:
2377  satstr += "*"
2378 
2379  for m in matrix:
2380  all_satisfied = True
2381  string = ""
2382  for n, b in enumerate(m):
2383  if b == 0:
2384  string += "0"
2385  if b == 1:
2386  string += "1"
2387  if b == 1 and satisfied_string[n] == 1:
2388  pass
2389  elif b == 1 and satisfied_string[n] == 0:
2390  pass
2391  elif b == 0 and satisfied_string[n] == 0:
2392  pass
2393  elif b == 0 and satisfied_string[n] == 1:
2394  all_satisfied = False
2395  if all_satisfied:
2396  satisfied_models += 1
2397  print(string)
2398  print(satstr, all_satisfied)
2399  print("models that satisfies the median satisfied crosslinks/total models", satisfied_models, len(matrix))
2400 
2401  def plot_matrix_cross_link_distances_unique(self, figurename, prot_list,
2402  prot_list2=None):
2403 
2404  import matplotlib as mpl
2405  mpl.use('Agg')
2406  import matplotlib.pylab as pl
2407 
2408  tmp_matrix = []
2409  for kw in self.cross_link_distances_unique:
2410  (r1, c1, r2, c2) = kw
2411  dists = self.cross_link_distances_unique[kw]
2412 
2413  if prot_list2 is None:
2414  if not c1 in prot_list:
2415  continue
2416  if not c2 in prot_list:
2417  continue
2418  else:
2419  if c1 in prot_list and c2 in prot_list2:
2420  pass
2421  elif c1 in prot_list2 and c2 in prot_list:
2422  pass
2423  else:
2424  continue
2425  # append the sum of dists to order by that in the matrix plot
2426  dists.append(sum(dists))
2427  tmp_matrix.append(dists)
2428 
2429  tmp_matrix.sort(key=itemgetter(len(tmp_matrix[0]) - 1))
2430 
2431  # print len(tmp_matrix), len(tmp_matrix[0])-1
2432  matrix = np.zeros((len(tmp_matrix), len(tmp_matrix[0]) - 1))
2433 
2434  for i in range(len(tmp_matrix)):
2435  for k in range(len(tmp_matrix[i]) - 1):
2436  matrix[i][k] = tmp_matrix[i][k]
2437 
2438  print(matrix)
2439 
2440  fig = pl.figure()
2441  ax = fig.add_subplot(211)
2442 
2443  cax = ax.imshow(matrix, interpolation='nearest')
2444  # ax.set_yticks(range(len(self.model_list_names)))
2445  #ax.set_yticklabels( [self.model_list_names[i] for i in leaves_order] )
2446  fig.colorbar(cax)
2447  pl.savefig(figurename, dpi=300)
2448  pl.show()
2449 
2450  def plot_bars(
2451  self,
2452  filename,
2453  prots1,
2454  prots2,
2455  nxl_per_row=20,
2456  arrangement="inter",
2457  confidence_input="None"):
2458 
2459  data = []
2460  for xl in self.cross_link_distances:
2461  (r1, c1, r2, c2, mdist, confidence) = xl
2462  if c1 in prots1 and c2 in prots2:
2463  if arrangement == "inter" and c1 == c2:
2464  continue
2465  if arrangement == "intra" and c1 != c2:
2466  continue
2467  if confidence_input == confidence:
2468  label = str(c1) + ":" + str(r1) + \
2469  "-" + str(c2) + ":" + str(r2)
2470  values = self.cross_link_distances[xl]
2471  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
2472  data.append((label, values, mdist, frequency))
2473 
2474  sort_by_dist = sorted(data, key=lambda tup: tup[2])
2475  sort_by_dist = list(zip(*sort_by_dist))
2476  values = sort_by_dist[1]
2477  positions = list(range(len(values)))
2478  labels = sort_by_dist[0]
2479  frequencies = list(map(float, sort_by_dist[3]))
2480  frequencies = [f * 10.0 for f in frequencies]
2481 
2482  nchunks = int(float(len(values)) / nxl_per_row)
2483  values_chunks = IMP.pmi.tools.chunk_list_into_segments(values, nchunks)
2484  positions_chunks = IMP.pmi.tools.chunk_list_into_segments(
2485  positions,
2486  nchunks)
2487  frequencies_chunks = IMP.pmi.tools.chunk_list_into_segments(
2488  frequencies,
2489  nchunks)
2490  labels_chunks = IMP.pmi.tools.chunk_list_into_segments(labels, nchunks)
2491 
2492  for n, v in enumerate(values_chunks):
2493  p = positions_chunks[n]
2494  f = frequencies_chunks[n]
2495  l = labels_chunks[n]
2497  filename + "." + str(n), v, p, f,
2498  valuename="Distance (Ang)", positionname="Unique " + arrangement + " Crosslinks", xlabels=l)
2499 
2500  def crosslink_distance_histogram(self, filename,
2501  prot_list=None,
2502  prot_list2=None,
2503  confidence_classes=None,
2504  bins=40,
2505  color='#66CCCC',
2506  yplotrange=[0, 1],
2507  format="png",
2508  normalized=False):
2509  if prot_list is None:
2510  prot_list = list(self.prot_length_dict.keys())
2511 
2512  distances = []
2513  for xl in self.crosslinks:
2514  (r1, c1, r2, c2, mdist, stdv, confidence,
2515  unique_identifier, descriptor) = xl
2516 
2517  if not confidence_classes is None:
2518  if confidence not in confidence_classes:
2519  continue
2520 
2521  if prot_list2 is None:
2522  if not c1 in prot_list:
2523  continue
2524  if not c2 in prot_list:
2525  continue
2526  else:
2527  if c1 in prot_list and c2 in prot_list2:
2528  pass
2529  elif c1 in prot_list2 and c2 in prot_list:
2530  pass
2531  else:
2532  continue
2533 
2534  distances.append(mdist)
2535 
2537  filename, distances, valuename="C-alpha C-alpha distance [Ang]",
2538  bins=bins, color=color,
2539  format=format,
2540  reference_xline=35.0,
2541  yplotrange=yplotrange, normalized=normalized)
2542 
2543  def scatter_plot_xl_features(self, filename,
2544  feature1=None,
2545  feature2=None,
2546  prot_list=None,
2547  prot_list2=None,
2548  yplotrange=None,
2549  reference_ylines=None,
2550  distance_color=True,
2551  format="png"):
2552  import matplotlib as mpl
2553  mpl.use('Agg')
2554  import matplotlib.pyplot as plt
2555  import matplotlib.cm as cm
2556 
2557  fig = plt.figure(figsize=(10, 10))
2558  ax = fig.add_subplot(111)
2559 
2560  for xl in self.crosslinks:
2561  (r1, c1, r2, c2, mdist, stdv, confidence,
2562  unique_identifier, arrangement) = xl
2563 
2564  if prot_list2 is None:
2565  if not c1 in prot_list:
2566  continue
2567  if not c2 in prot_list:
2568  continue
2569  else:
2570  if c1 in prot_list and c2 in prot_list2:
2571  pass
2572  elif c1 in prot_list2 and c2 in prot_list:
2573  pass
2574  else:
2575  continue
2576 
2577  xldb = self.external_csv_data[unique_identifier]
2578  xldb.update({"Distance": mdist})
2579  xldb.update({"Distance_stdv": stdv})
2580 
2581  xvalue = float(xldb[feature1])
2582  yvalue = float(xldb[feature2])
2583 
2584  if distance_color:
2585  color = self.colormap(mdist)
2586  else:
2587  color = "gray"
2588 
2589  ax.plot([xvalue], [yvalue], 'o', c=color, alpha=0.1, markersize=7)
2590 
2591  if not yplotrange is None:
2592  ax.set_ylim(yplotrange)
2593  if not reference_ylines is None:
2594  for rl in reference_ylines:
2595  ax.axhline(rl, color='red', linestyle='dashed', linewidth=1)
2596 
2597  if filename:
2598  plt.savefig(filename + "." + format, dpi=150, transparent="False")
2599 
2600  plt.show()
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Molecule.h:35
Simple 3D transformation class.
Visualization of crosslinks.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
double get_drms(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
A class for reading stat files.
Definition: output.py:674
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
def plot_field_histogram
Plot a list of histograms from a value list.
Definition: output.py:935
def plot_fields_box_plots
Plot time series as boxplots.
Definition: output.py:1000
double get_drmsd(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1)
Calculate distance the root mean square deviation between two sets of 3D points.
Definition: atom/distance.h:49
Miscellaneous utilities.
Definition: tools.py:1
def __init__
Constructor.
double get_weighted_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2, const Floats &weights)
void link_restraints(RMF::FileConstHandle fh, const Restraints &hs)
A class to evaluate the precision of an ensemble.
log
Definition: log.py:1
A class to cluster structures.
def get_rmsd_wrt_reference_structure_with_alignment
First align then calculate RMSD.
static Molecule setup_particle(Model *m, ParticleIndex pi)
Definition: Molecule.h:37
def __init__
Constructor.
Definition: pmi/Analysis.py:32
Class for sampling a density map from particles.
def add_structure
Read a structure into the ensemble and store (as coordinates).
DensityMap * multiply(const DensityMap *m1, const DensityMap *m2)
Return a density map for which voxel i contains the result of m1[i]*m2[i].
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:23
def add_subunits_density
Add a frame to the densities.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
Definition: DensityMap.h:626
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:1042
double get_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
Basic utilities for handling cryo-electron microscopy 3D density maps.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name.
def fill
Add coordinates for a single model.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: utilities.h:98
def do_cluster
Run K-means clustering.
3D rotation class.
Definition: Rotation3D.h:46
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:457
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def set_reference_structure
Read in a structure used for reference computation.
def get_rmsf
Calculate the residue mean square fluctuations (RMSF).
def get_average_distance_wrt_reference_structure
Compare the structure set to the reference structure.
def get_density
Get the current density for some component name.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
The general base class for IMP exceptions.
Definition: exception.h:49
def get_precision
Evaluate the precision of two named structure groups.
VectorD< 3 > Vector3D
Definition: VectorD.h:395
Restraints create_restraints(RMF::FileConstHandle fh, Model *m)
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
def get_particles_at_resolution_ten
Get particles at res 10, or any beads, based on the name.
Python classes to represent, score, sample and analyze models.
def add_structures
Read a list of RMFs, supports parallel.
Transformation3D get_transformation_aligning_first_to_second(Vector3Ds a, Vector3Ds b)
Hierarchies get_leaves(const Selection &h)
double get_drmsd_Q(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1, double threshold)
Definition: atom/distance.h:85
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Compute mean density maps from structures.
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:1000
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27