IMP logo
IMP Reference Guide  develop.cb6747d2d1,2024/03/28
The Integrative Modeling Platform
pmi/Analysis.py
1 #!/usr/bin/env python
2 
3 """@namespace IMP.pmi.analysis
4  Tools for clustering and cluster analysis
5 """
6 from __future__ import print_function
7 import IMP
8 import IMP.algebra
9 import IMP.em
10 import IMP.pmi
11 import IMP.pmi.tools
12 import IMP.pmi.output
13 import IMP.rmf
14 import RMF
15 import IMP.pmi.analysis
16 from operator import itemgetter
17 from math import log, sqrt
18 import itertools
19 import numpy as np
20 
21 
22 class Alignment(object):
23  """Performs alignment and RMSD calculation for two sets of coordinates
24 
25  The class also takes into account non-equal stoichiometry of the proteins.
26  If this is the case, the protein names of proteins in multiple copies
27  should be specified in the following form:
28  nameA..1, nameA..2 (note two dots).
29  """
30 
31  def __init__(self, template, query, weights=None):
32  """Constructor.
33  @param query {'p1':coords(L,3), 'p2':coords(L,3)}
34  @param template {'p1':coords(L,3), 'p2':coords(L,3)}
35  @param weights optional weights for each set of coordinates
36  """
37  self.query = query
38  self.template = template
39  self.weights = weights
40 
41  if len(self.query.keys()) != len(self.template.keys()):
42  raise ValueError('''the number of proteins
43  in template and query does not match!''')
44 
45  def permute(self):
46  # get unique protein names for each protein
47  # this is, unfortunately, expecting that names are all 'Molname..X'
48  # where X is different for each copy.
49  self.proteins = sorted(self.query.keys())
50  prots_uniq = [i.split('..')[0] for i in self.proteins]
51 
52  # for each unique name, store list of permutations
53  # e.g. for keys A..1,A..2 store P[A] = [[A..1,A..2],[A..2,A..1]]
54  # then store the product: [[[A1,A2],[B1,B2]],[[A1,A2],[B2,B1]],
55  # [[A2,A1],[B1,B2]],[[A2,A1],[B2,B1]]]
56  P = {}
57  for p in prots_uniq:
58  copies = [i for i in self.proteins if i.split('..')[0] == p]
59  prmts = list(itertools.permutations(copies, len(copies)))
60  P[p] = prmts
61  self.P = P
62  self.Product = list(itertools.product(*P.values()))
63 
64  def get_rmsd(self):
65 
66  self.permute()
67 
68  template_xyz = []
69  weights = []
70  torder = sum([list(i) for i in self.Product[0]], [])
71  for t in torder:
72  template_xyz += [IMP.algebra.Vector3D(i) for i in self.template[t]]
73  if self.weights is not None:
74  weights += [i for i in self.weights[t]]
75 
76  self.rmsd = 10000000000.
77  for comb in self.Product:
78 
79  order = sum([list(i) for i in comb], [])
80  query_xyz = []
81  for p in order:
82  query_xyz += [IMP.algebra.Vector3D(i) for i in self.query[p]]
83 
84  if self.weights is not None:
86  template_xyz, query_xyz, weights)
87  else:
88  dist = IMP.algebra.get_rmsd(template_xyz, query_xyz)
89  if dist < self.rmsd:
90  self.rmsd = dist
91  return self.rmsd
92 
93  def align(self):
94  from scipy.spatial.distance import cdist
95 
96  self.permute()
97 
98  # create flat coordinate list from template in standard order
99  # then loop through the permutations and try to align and get RMSD
100  # should allow you to try all mappings within each protein
101  template_xyz = []
102  torder = sum([list(i) for i in self.Product[0]], [])
103  for t in torder:
104  template_xyz += [IMP.algebra.Vector3D(i) for i in self.template[t]]
105  # template_xyz = np.array(template_xyz)
106  self.rmsd, Transformation = 10000000000., ''
107 
108  # then for each permutation, get flat list of coords and get RMSD
109  for comb in self.Product:
110  order = sum([list(i) for i in comb], [])
111  query_xyz = []
112  for p in order:
113  query_xyz += [IMP.algebra.Vector3D(i) for i in self.query[p]]
114 
115  if len(template_xyz) != len(query_xyz):
116  raise ValueError('''the number of coordinates
117  in template and query does not match!''')
118 
119  transformation = \
121  query_xyz, template_xyz)
122  query_xyz_tr = [transformation.get_transformed(n)
123  for n in query_xyz]
124 
125  dist = sqrt(
126  sum(np.diagonal(cdist(template_xyz, query_xyz_tr) ** 2))
127  / len(template_xyz))
128  if dist < self.rmsd:
129  self.rmsd = dist
130  Transformation = transformation
131 
132  # return the transformation
133  return (self.rmsd, Transformation)
134 
135 
136 # TEST for the alignment ###
137 """
138 Proteins = {'a..1':np.array([np.array([-1.,1.])]),
139  'a..2':np.array([np.array([1.,1.,])]),
140  'a..3':np.array([np.array([-2.,1.])]),
141  'b':np.array([np.array([0.,-1.])]),
142  'c..1':np.array([np.array([-1.,-1.])]),
143  'c..2':np.array([np.array([1.,-1.])]),
144  'd':np.array([np.array([0.,0.])]),
145  'e':np.array([np.array([0.,1.])])}
146 
147 Ali = Alignment(Proteins, Proteins)
148 Ali.permute()
149 if Ali.get_rmsd() == 0.0: print 'successful test!'
150 else: print 'ERROR!'; exit()
151 """
152 
153 
154 # ----------------------------------
155 class Violations(object):
156 
157  def __init__(self, filename):
158 
159  self.violation_thresholds = {}
160  self.violation_counts = {}
161 
162  data = open(filename)
163  D = data.readlines()
164  data.close()
165 
166  for d in D:
167  d = d.strip().split()
168  self.violation_thresholds[d[0]] = float(d[1])
169 
170  def get_number_violated_restraints(self, rsts_dict):
171  num_violated = 0
172  for rst in self.violation_thresholds:
173  if rst not in rsts_dict:
174  continue # print rst;
175  if float(rsts_dict[rst]) > self.violation_thresholds[rst]:
176  num_violated += 1
177  if rst not in self.violation_counts:
178  self.violation_counts[rst] = 1
179  else:
180  self.violation_counts[rst] += 1
181  return num_violated
182 
183 
184 class Clustering(object):
185  """A class to cluster structures.
186  Uses scipy's cdist function to compute distance matrices
187  and sklearn's kmeans clustering module.
188  """
189  def __init__(self, rmsd_weights=None):
190  """Constructor.
191  @param rmsd_weights Flat list of weights for each particle
192  (if they're coarse)
193  """
194  try:
195  from mpi4py import MPI
196  self.comm = MPI.COMM_WORLD
197  self.rank = self.comm.Get_rank()
198  self.number_of_processes = self.comm.size
199  except ImportError:
200  self.number_of_processes = 1
201  self.rank = 0
202  self.all_coords = {}
203  self.structure_cluster_ids = None
204  self.tmpl_coords = None
205  self.rmsd_weights = rmsd_weights
206 
207  def set_template(self, part_coords):
208  self.tmpl_coords = part_coords
209 
210  def fill(self, frame, Coords):
211  """Add coordinates for a single model."""
212  self.all_coords[frame] = Coords
213 
214  def dist_matrix(self):
215  self.model_list_names = list(self.all_coords.keys())
216  self.model_indexes = list(range(len(self.model_list_names)))
217  self.model_indexes_dict = dict(
218  list(zip(self.model_list_names, self.model_indexes)))
219  model_indexes_unique_pairs = \
220  list(itertools.combinations(self.model_indexes, 2))
221 
222  my_model_indexes_unique_pairs = IMP.pmi.tools.chunk_list_into_segments(
223  model_indexes_unique_pairs,
224  self.number_of_processes)[self.rank]
225 
226  print("process %s assigned with %s pairs"
227  % (str(self.rank), str(len(my_model_indexes_unique_pairs))))
228 
229  (raw_distance_dict, self.transformation_distance_dict) = \
230  self.matrix_calculation(self.all_coords, self.tmpl_coords,
231  my_model_indexes_unique_pairs)
232 
233  if self.number_of_processes > 1:
234  raw_distance_dict = IMP.pmi.tools.scatter_and_gather(
235  raw_distance_dict)
236  pickable_transformations = \
237  self.get_pickable_transformation_distance_dict()
238  pickable_transformations = IMP.pmi.tools.scatter_and_gather(
239  pickable_transformations)
240  self.set_transformation_distance_dict_from_pickable(
241  pickable_transformations)
242 
243  self.raw_distance_matrix = np.zeros(
244  (len(self.model_list_names), len(self.model_list_names)))
245  for item in raw_distance_dict:
246  (f1, f2) = item
247  self.raw_distance_matrix[f1, f2] = raw_distance_dict[item]
248  self.raw_distance_matrix[f2, f1] = raw_distance_dict[item]
249 
250  def get_dist_matrix(self):
251  return self.raw_distance_matrix
252 
253  def do_cluster(self, number_of_clusters, seed=None):
254  """Run K-means clustering
255  @param number_of_clusters Num means
256  @param seed the random seed
257  """
258  from sklearn.cluster import KMeans
259  if seed is not None:
260  np.random.seed(seed)
261  try:
262  # check whether we have the right version of sklearn
263  kmeans = KMeans(n_clusters=number_of_clusters)
264  except TypeError:
265  # sklearn older than 0.12
266  kmeans = KMeans(k=number_of_clusters)
267  kmeans.fit_predict(self.raw_distance_matrix)
268 
269  self.structure_cluster_ids = kmeans.labels_
270 
271  def get_pickable_transformation_distance_dict(self):
272  pickable_transformations = {}
273  for label in self.transformation_distance_dict:
274  tr = self.transformation_distance_dict[label]
275  trans = tuple(tr.get_translation())
276  rot = tuple(tr.get_rotation().get_quaternion())
277  pickable_transformations[label] = (rot, trans)
278  return pickable_transformations
279 
280  def set_transformation_distance_dict_from_pickable(
281  self,
282  pickable_transformations):
283  self.transformation_distance_dict = {}
284  for label in pickable_transformations:
285  tr = pickable_transformations[label]
286  trans = IMP.algebra.Vector3D(tr[1])
287  rot = IMP.algebra.Rotation3D(tr[0])
288  self.transformation_distance_dict[
289  label] = IMP.algebra.Transformation3D(rot, trans)
290 
291  def save_distance_matrix_file(self, file_name='cluster.rawmatrix.pkl'):
292  import pickle
293  outf = open(file_name + ".data", 'wb')
294 
295  # to pickle the transformation dictionary
296  # you have to save the arrays correposnding to
297  # the transformations
298 
299  pickable_transformations = \
300  self.get_pickable_transformation_distance_dict()
301  pickle.dump(
302  (self.structure_cluster_ids,
303  self.model_list_names,
304  pickable_transformations),
305  outf)
306 
307  np.save(file_name + ".npy", self.raw_distance_matrix)
308 
309  def load_distance_matrix_file(self, file_name='cluster.rawmatrix.pkl'):
310  import pickle
311 
312  inputf = open(file_name + ".data", 'rb')
313  (self.structure_cluster_ids, self.model_list_names,
314  pickable_transformations) = pickle.load(inputf)
315  inputf.close()
316 
317  self.raw_distance_matrix = np.load(file_name + ".npy")
318 
319  self.set_transformation_distance_dict_from_pickable(
320  pickable_transformations)
321  self.model_indexes = list(range(len(self.model_list_names)))
322  self.model_indexes_dict = dict(
323  list(zip(self.model_list_names, self.model_indexes)))
324 
325  def plot_matrix(self, figurename="clustermatrix.pdf"):
326  import matplotlib as mpl
327  mpl.use('Agg')
328  import matplotlib.pylab as pl
329  from scipy.cluster import hierarchy as hrc
330 
331  fig = pl.figure(figsize=(10, 8))
332  ax = fig.add_subplot(212)
333  dendrogram = hrc.dendrogram(
334  hrc.linkage(self.raw_distance_matrix),
335  color_threshold=7,
336  no_labels=True)
337  leaves_order = dendrogram['leaves']
338  ax.set_xlabel('Model')
339  ax.set_ylabel('RMSD [Angstroms]')
340 
341  ax2 = fig.add_subplot(221)
342  cax = ax2.imshow(
343  self.raw_distance_matrix[leaves_order,
344  :][:,
345  leaves_order],
346  interpolation='nearest')
347  cb = fig.colorbar(cax)
348  cb.set_label('RMSD [Angstroms]')
349  ax2.set_xlabel('Model')
350  ax2.set_ylabel('Model')
351 
352  pl.savefig(figurename, dpi=300)
353  pl.close(fig)
354 
355  def get_model_index_from_name(self, name):
356  return self.model_indexes_dict[name]
357 
358  def get_cluster_labels(self):
359  # this list
360  return list(set(self.structure_cluster_ids))
361 
362  def get_number_of_clusters(self):
363  return len(self.get_cluster_labels())
364 
365  def get_cluster_label_indexes(self, label):
366  return (
367  [i for i, l in enumerate(self.structure_cluster_ids) if l == label]
368  )
369 
370  def get_cluster_label_names(self, label):
371  return (
372  [self.model_list_names[i]
373  for i in self.get_cluster_label_indexes(label)]
374  )
375 
376  def get_cluster_label_average_rmsd(self, label):
377 
378  indexes = self.get_cluster_label_indexes(label)
379 
380  if len(indexes) > 1:
381  sub_distance_matrix = self.raw_distance_matrix[
382  indexes, :][:, indexes]
383  average_rmsd = np.sum(sub_distance_matrix) / \
384  (len(sub_distance_matrix)
385  ** 2 - len(sub_distance_matrix))
386  else:
387  average_rmsd = 0.0
388  return average_rmsd
389 
390  def get_cluster_label_size(self, label):
391  return len(self.get_cluster_label_indexes(label))
392 
393  def get_transformation_to_first_member(
394  self,
395  cluster_label,
396  structure_index):
397  reference = self.get_cluster_label_indexes(cluster_label)[0]
398  return self.transformation_distance_dict[(reference, structure_index)]
399 
400  def matrix_calculation(self, all_coords, template_coords, list_of_pairs):
401 
402  model_list_names = list(all_coords.keys())
403  rmsd_protein_names = list(all_coords[model_list_names[0]].keys())
404  raw_distance_dict = {}
405  transformation_distance_dict = {}
406  if template_coords is None:
407  do_alignment = False
408  else:
409  do_alignment = True
410  alignment_template_protein_names = list(template_coords.keys())
411 
412  for (f1, f2) in list_of_pairs:
413 
414  if not do_alignment:
415  # here we only get the rmsd,
416  # we need that for instance when you want to cluster
417  # conformations globally, eg the EM map is a reference
419 
420  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
421  for pr in rmsd_protein_names])
422  coords_f2 = {}
423  for pr in rmsd_protein_names:
424  coords_f2[pr] = all_coords[model_list_names[f2]][pr]
425 
426  Ali = Alignment(coords_f1, coords_f2, self.rmsd_weights)
427  rmsd = Ali.get_rmsd()
428 
429  elif do_alignment:
430  # here we actually align the conformations first
431  # and than calculate the rmsd. We need that when the
432  # protein(s) is the reference
433  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
434  for pr in alignment_template_protein_names])
435  coords_f2 = dict([(pr, all_coords[model_list_names[f2]][pr])
436  for pr in alignment_template_protein_names])
437 
438  Ali = Alignment(coords_f1, coords_f2)
439  template_rmsd, transformation = Ali.align()
440 
441  # here we calculate the rmsd
442  # we will align two models based n the number of subunits
443  # provided and transform coordinates of model 2 to model 1
444  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
445  for pr in rmsd_protein_names])
446  coords_f2 = {}
447  for pr in rmsd_protein_names:
448  coords_f2[pr] = [transformation.get_transformed(
449  i) for i in all_coords[model_list_names[f2]][pr]]
450 
451  Ali = Alignment(coords_f1, coords_f2, self.rmsd_weights)
452  rmsd = Ali.get_rmsd()
453 
454  raw_distance_dict[(f1, f2)] = rmsd
455  raw_distance_dict[(f2, f1)] = rmsd
456  transformation_distance_dict[(f1, f2)] = transformation
457  transformation_distance_dict[(f2, f1)] = transformation
458 
459  return raw_distance_dict, transformation_distance_dict
460 
461 
462 class RMSD(object):
463  """Compute the RMSD (without alignment) taking into account the copy
464  ambiguity.
465  To be used with pmi2 hierarchies. Can be used for instance as follows:
466 
467  rmsd = IMP.pmi.analysis.RMSD(
468  hier, hier, [mol.get_name() for mol in mols],
469  dynamic0=True, dynamic1=False)
470  output_objects.append(rmsd)
471 
472  before shuffling the coordinates
473  """
474 
475  def __init__(self, hier0, hier1, molnames, label="None", dynamic0=True,
476  dynamic1=True, metric=IMP.algebra.get_rmsd):
477  """
478  @param hier0 first input hierarchy
479  @param hier1 second input hierarchy
480  @param molnames the names of the molecules used for the RMSD
481  @dynamic0 if True stores the decorators XYZ and coordinates of
482  hier0 can be update. If false coordinates are static
483  (stored in Vector3Ds) and will never be updated
484  @dynamic1 same as above
485  metric what metric should be used
486  """
487  self.moldict0, self.molcoords0, self.mol_XYZs0 = \
488  self.get_moldict_coorddict(hier0, molnames)
489  self.moldict1, self.molcoords1, self.mol_XYZs1 = \
490  self.get_moldict_coorddict(hier1, molnames)
491  self.dynamic0 = dynamic0
492  self.dynamic1 = dynamic1
493  self.metric = metric
494  self.label = label
495 
496  def get_moldict_coorddict(self, hier, molnames):
497  """return data structure for the RMSD calculation"""
498  moldict = {}
499  mol_coords = {}
500  mol_XYZs = {}
501  for mol in IMP.pmi.tools.get_molecules(hier):
502  name = mol.get_name()
503  if name not in molnames:
504  continue
505  parts = True
506  mol_coords[mol] = []
507  mol_XYZs[mol] = []
508  i = 1
509  while parts:
510  sel = IMP.atom.Selection(
511  mol, residue_index=i,
512  representation_type=IMP.atom.BALLS,
513  resolution=1)
514  parts = sel.get_selected_particles()
515  if parts:
516  mol_coords[mol].append(
517  IMP.core.XYZ(parts[0]).get_coordinates())
518  mol_XYZs[mol].append(IMP.core.XYZ(parts[0]))
519  i = i+1
520  if name in moldict:
521  moldict[name].append(mol)
522  else:
523  moldict[name] = [mol]
524  return moldict, mol_coords, mol_XYZs
525 
526  def get_rmsd_and_assigments(self):
527  total_rmsd = 0
528  total_N = 0
529  best_assignments = []
530  rmsd_dict = {}
531  for molname, ref_mols in self.moldict1.items():
532  ref_coords = []
533  for ref_mol in ref_mols:
534  if self.dynamic1:
535  coord1 = [XYZ.get_coordinates()
536  for XYZ in self.mol_XYZs1[ref_mol]]
537  else:
538  coord1 = self.molcoords1[ref_mol]
539  ref_coords += coord1
540 
541  rmsd = []
542  rmf_mols_list = []
543  for rmf_mols in itertools.permutations(self.moldict0[molname]):
544  rmf_coords = []
545  for rmf_mol in rmf_mols:
546  if self.dynamic0:
547  coord0 = [XYZ.get_coordinates()
548  for XYZ in self.mol_XYZs0[rmf_mol]]
549  else:
550  coord0 = self.molcoords0[rmf_mol]
551  rmf_coords += coord0
552 
553  rmsd.append(IMP.algebra.get_rmsd(ref_coords, rmf_coords))
554  rmf_mols_list.append(rmf_mols)
555  m = min(rmsd)
556  rmf_mols_best_order = rmf_mols_list[rmsd.index(m)]
557 
558  for n, (ref_mol, rmf_mol) in enumerate(
559  zip(ref_mols, rmf_mols_best_order)):
560  best_assignments.append((rmf_mol, ref_mol))
561  if self.dynamic0:
562  coord0 = [XYZ.get_coordinates()
563  for XYZ in self.mol_XYZs0[rmf_mol]]
564  else:
565  coord0 = self.molcoords0[rmf_mol]
566 
567  if self.dynamic1:
568  coord1 = [XYZ.get_coordinates()
569  for XYZ in self.mol_XYZs1[ref_mol]]
570  else:
571  coord1 = self.molcoords1[ref_mol]
572  rmsd_pair = self.metric(coord1, coord0)
573  N = len(self.molcoords1[ref_mol])
574  total_N += N
575  total_rmsd += rmsd_pair*rmsd_pair*N
576  rmsd_dict[ref_mol] = rmsd_pair
577  total_rmsd = sqrt(total_rmsd/total_N)
578  return total_rmsd, best_assignments
579 
580  def get_output(self):
581  """Returns output for IMP.pmi.output.Output object"""
582  total_rmsd, best_assignments = self.get_rmsd_and_assigments()
583 
584  assignments_out = []
585  for rmf_mol, ref_mol in best_assignments:
586  ref_name = ref_mol.get_name()
587  ref_copy = IMP.atom.Copy(ref_mol).get_copy_index()
588  rmf_name = rmf_mol.get_name()
589  rmf_copy = IMP.atom.Copy(rmf_mol).get_copy_index()
590  assignments_out.append(rmf_name + "." + str(rmf_copy) + "->" +
591  ref_name + "." + str(ref_copy))
592  return {"RMSD_" + self.label: str(total_rmsd),
593  "RMSD_assignments_" + self.label: str(assignments_out)}
594 
595 
596 class Precision(object):
597  """A class to evaluate the precision of an ensemble.
598 
599  Also can evaluate the cross-precision of multiple ensembles.
600  Supports MPI for coordinate reading.
601  Recommended procedure:
602  -# initialize object and pass the selection for evaluating precision
603  -# call add_structures() to read in the data (specify group name)
604  -# call get_precision() to evaluate inter/intra precision
605  -# call get_rmsf() to evaluate within-group fluctuations
606  """
607  def __init__(self, model, resolution=1, selection_dictionary={}):
608  """Constructor.
609  @param model The IMP Model
610  @param resolution Use 1 or 10 (kluge: requires that "_Res:X" is
611  part of the hier name)
612  @param selection_dictionary Dictionary where keys are names for
613  selections and values are selection tuples for scoring
614  precision. "All" is automatically made as well
615  """
616  try:
617  from mpi4py import MPI
618  self.comm = MPI.COMM_WORLD
619  self.rank = self.comm.Get_rank()
620  self.number_of_processes = self.comm.size
621  except ImportError:
622  self.number_of_processes = 1
623  self.rank = 0
624 
625  self.styles = ['pairwise_rmsd', 'pairwise_drmsd_k', 'pairwise_drmsd_Q',
626  'pairwise_drms_k', 'pairwise_rmsd', 'drmsd_from_center']
627  self.style = 'pairwise_drmsd_k'
628  self.structures_dictionary = {}
629  self.reference_structures_dictionary = {}
630  self.prots = []
631  self.protein_names = None
632  self.len_particles_resolution_one = None
633  self.model = model
634  self.rmf_names_frames = {}
635  self.reference_rmf_names_frames = None
636  self.reference_structure = None
637  self.reference_prot = None
638  self.selection_dictionary = selection_dictionary
639  self.threshold = 40.0
640  self.residue_particle_index_map = None
641  self.prots = None
642  if resolution in [1, 10]:
643  self.resolution = resolution
644  else:
645  raise KeyError("Currently only allow resolution 1 or 10")
646 
647  def _get_structure(self, rmf_frame_index, rmf_name):
648  """Read an RMF file and return the particles"""
649  rh = RMF.open_rmf_file_read_only(rmf_name)
650  if not self.prots:
651  print("getting coordinates for frame %i rmf file %s"
652  % (rmf_frame_index, rmf_name))
653  self.prots = IMP.rmf.create_hierarchies(rh, self.model)
654  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
655  else:
656  print("linking coordinates for frame %i rmf file %s"
657  % (rmf_frame_index, rmf_name))
658  IMP.rmf.link_hierarchies(rh, self.prots)
659  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
660  del rh
661 
662  if self.resolution == 1:
663  particle_dict = get_particles_at_resolution_one(self.prots[0])
664  elif self.resolution == 10:
665  particle_dict = get_particles_at_resolution_ten(self.prots[0])
666 
667  protein_names = list(particle_dict.keys())
668  particles_resolution_one = []
669  for k in particle_dict:
670  particles_resolution_one += (particle_dict[k])
671 
672  if self.protein_names is None:
673  self.protein_names = protein_names
674  else:
675  if self.protein_names != protein_names:
676  print("Error: the protein names of the new coordinate set "
677  "is not compatible with the previous one")
678 
679  if self.len_particles_resolution_one is None:
680  self.len_particles_resolution_one = len(particles_resolution_one)
681  else:
682  if self.len_particles_resolution_one != \
683  len(particles_resolution_one):
684  raise ValueError("the new coordinate set is not compatible "
685  "with the previous one")
686 
687  return particles_resolution_one
688 
689  def add_structure(self,
690  rmf_name,
691  rmf_frame_index,
692  structure_set_name,
693  setup_index_map=False):
694  """ Read a structure into the ensemble and store (as coordinates).
695  @param rmf_name The name of the RMF file
696  @param rmf_frame_index The frame to read
697  @param structure_set_name Name for the set that includes this structure
698  (e.g. "cluster 1")
699  @param setup_index_map if requested, set up a dictionary to help
700  find residue indexes
701  """
702 
703  # decide where to put this structure
704  if structure_set_name in self.structures_dictionary:
705  cdict = self.structures_dictionary[structure_set_name]
706  rmflist = self.rmf_names_frames[structure_set_name]
707  else:
708  self.structures_dictionary[structure_set_name] = {}
709  self.rmf_names_frames[structure_set_name] = []
710  cdict = self.structures_dictionary[structure_set_name]
711  rmflist = self.rmf_names_frames[structure_set_name]
712 
713  # read the particles
714  try:
715  particles_resolution_one = self._get_structure(
716  rmf_frame_index, rmf_name)
717  except ValueError:
718  print("something wrong with the rmf")
719  return 0
720  self.selection_dictionary.update({"All": self.protein_names})
721 
722  for selection_name in self.selection_dictionary:
723  selection_tuple = self.selection_dictionary[selection_name]
724  coords = self._select_coordinates(
725  selection_tuple, particles_resolution_one, self.prots[0])
726  if selection_name not in cdict:
727  cdict[selection_name] = [coords]
728  else:
729  cdict[selection_name].append(coords)
730 
731  rmflist.append((rmf_name, rmf_frame_index))
732 
733  # if requested, set up a dictionary to help find residue indexes
734  if setup_index_map:
735  self.residue_particle_index_map = {}
736  for prot_name in self.protein_names:
737  self.residue_particle_index_map[prot_name] = \
738  self._get_residue_particle_index_map(
739  prot_name, particles_resolution_one, self.prots[0])
740 
741  def add_structures(self,
742  rmf_name_frame_tuples,
743  structure_set_name):
744  """Read a list of RMFs, supports parallel
745  @param rmf_name_frame_tuples list of (rmf_file_name,frame_number)
746  @param structure_set_name Name this set of structures
747  (e.g. "cluster.1")
748  """
749 
750  # split up the requested list to read in parallel
751  my_rmf_name_frame_tuples = IMP.pmi.tools.chunk_list_into_segments(
752  rmf_name_frame_tuples, self.number_of_processes)[self.rank]
753  for nfr, tup in enumerate(my_rmf_name_frame_tuples):
754  rmf_name = tup[0]
755  rmf_frame_index = tup[1]
756  # the first frame stores the map between residues and particles
757  if self.residue_particle_index_map is None:
758  setup_index_map = True
759  else:
760  setup_index_map = False
761  self.add_structure(rmf_name,
762  rmf_frame_index,
763  structure_set_name,
764  setup_index_map)
765 
766  # synchronize the structures
767  if self.number_of_processes > 1:
768  self.rmf_names_frames = IMP.pmi.tools.scatter_and_gather(
769  self.rmf_names_frames)
770  if self.rank != 0:
771  self.comm.send(self.structures_dictionary, dest=0, tag=11)
772  elif self.rank == 0:
773  for i in range(1, self.number_of_processes):
774  data_tmp = self.comm.recv(source=i, tag=11)
775  for key in self.structures_dictionary:
776  self.structures_dictionary[key].update(data_tmp[key])
777  for i in range(1, self.number_of_processes):
778  self.comm.send(self.structures_dictionary, dest=i, tag=11)
779  if self.rank != 0:
780  self.structures_dictionary = self.comm.recv(source=0, tag=11)
781 
782  def _get_residue_particle_index_map(self, prot_name, structure, hier):
783  # Creates map from all particles to residue numbers
784  residue_particle_index_map = []
785  s = IMP.atom.Selection(hier, molecules=[prot_name], resolution=1)
786  all_selected_particles = s.get_selected_particles()
787  intersection = list(set(all_selected_particles) & set(structure))
788  sorted_intersection = IMP.pmi.tools.sort_by_residues(intersection)
789  for p in sorted_intersection:
790  residue_particle_index_map.append(
792  return residue_particle_index_map
793 
794  def _select_coordinates(self, tuple_selections, structure, prot):
795  selected_coordinates = []
796  for t in tuple_selections:
797  if isinstance(t, tuple) and len(t) == 3:
798  s = IMP.atom.Selection(
799  prot, molecules=[t[2]],
800  residue_indexes=range(t[0], t[1]+1), resolution=1)
801  all_selected_particles = s.get_selected_particles()
802  intersection = list(set(all_selected_particles)
803  & set(structure))
804  sorted_intersection = \
805  IMP.pmi.tools.sort_by_residues(intersection)
806  cc = [tuple(IMP.core.XYZ(p).get_coordinates())
807  for p in sorted_intersection]
808  selected_coordinates += cc
809  elif isinstance(t, str):
810  s = IMP.atom.Selection(prot, molecules=[t], resolution=1)
811  all_selected_particles = s.get_selected_particles()
812  intersection = list(set(all_selected_particles)
813  & set(structure))
814  sorted_intersection = \
815  IMP.pmi.tools.sort_by_residues(intersection)
816  cc = [tuple(IMP.core.XYZ(p).get_coordinates())
817  for p in sorted_intersection]
818  selected_coordinates += cc
819  else:
820  raise ValueError("Selection error")
821  return selected_coordinates
822 
823  def set_threshold(self, threshold):
824  self.threshold = threshold
825 
826  def _get_distance(self, structure_set_name1, structure_set_name2,
827  selection_name, index1, index2):
828  """Compute distance between structures with various metrics"""
829  c1 = self.structures_dictionary[
830  structure_set_name1][selection_name][index1]
831  c2 = self.structures_dictionary[
832  structure_set_name2][selection_name][index2]
833  coordinates1 = [IMP.algebra.Vector3D(c) for c in c1]
834  coordinates2 = [IMP.algebra.Vector3D(c) for c in c2]
835 
836  if self.style == 'pairwise_drmsd_k':
837  distance = IMP.atom.get_drmsd(coordinates1, coordinates2)
838  if self.style == 'pairwise_drms_k':
839  distance = IMP.atom.get_drms(coordinates1, coordinates2)
840  if self.style == 'pairwise_drmsd_Q':
841  distance = IMP.atom.get_drmsd_Q(coordinates1, coordinates2,
842  self.threshold)
843 
844  if self.style == 'pairwise_rmsd':
845  distance = IMP.algebra.get_rmsd(coordinates1, coordinates2)
846  return distance
847 
848  def _get_particle_distances(self, structure_set_name1, structure_set_name2,
849  selection_name, index1, index2):
850  c1 = self.structures_dictionary[
851  structure_set_name1][selection_name][index1]
852  c2 = self.structures_dictionary[
853  structure_set_name2][selection_name][index2]
854 
855  coordinates1 = [IMP.algebra.Vector3D(c) for c in c1]
856  coordinates2 = [IMP.algebra.Vector3D(c) for c in c2]
857 
858  distances = [np.linalg.norm(a-b)
859  for (a, b) in zip(coordinates1, coordinates2)]
860 
861  return distances
862 
863  def get_precision(self, structure_set_name1, structure_set_name2,
864  outfile=None, skip=1, selection_keywords=None):
865  """ Evaluate the precision of two named structure groups. Supports MPI.
866  When the structure_set_name1 is different from the structure_set_name2,
867  this evaluates the cross-precision (average pairwise distances).
868  @param outfile Name of the precision output file
869  @param structure_set_name1 string name of the first structure set
870  @param structure_set_name2 string name of the second structure set
871  @param skip analyze every (skip) structure for the distance
872  matrix calculation
873  @param selection_keywords Specify the selection name you want to
874  calculate on. By default this is computed for everything
875  you provided in the constructor, plus all the subunits together.
876  """
877  if selection_keywords is None:
878  sel_keys = list(self.selection_dictionary.keys())
879  else:
880  for k in selection_keywords:
881  if k not in self.selection_dictionary:
882  raise KeyError(
883  "you are trying to find named selection "
884  + k + " which was not requested in the constructor")
885  sel_keys = selection_keywords
886 
887  if outfile is not None:
888  of = open(outfile, "w")
889  centroid_index = 0
890  for selection_name in sel_keys:
891  number_of_structures_1 = len(self.structures_dictionary[
892  structure_set_name1][selection_name])
893  number_of_structures_2 = len(self.structures_dictionary[
894  structure_set_name2][selection_name])
895  distances = {}
896  structure_pointers_1 = list(range(0, number_of_structures_1, skip))
897  structure_pointers_2 = list(range(0, number_of_structures_2, skip))
898  pair_combination_list = list(
899  itertools.product(structure_pointers_1, structure_pointers_2))
900  if len(pair_combination_list) == 0:
901  raise ValueError("no structure selected. Check the "
902  "skip parameter.")
903 
904  # compute pairwise distances in parallel
905  my_pair_combination_list = IMP.pmi.tools.chunk_list_into_segments(
906  pair_combination_list, self.number_of_processes)[self.rank]
907  for n, pair in enumerate(my_pair_combination_list):
908  distances[pair] = self._get_distance(
909  structure_set_name1, structure_set_name2, selection_name,
910  pair[0], pair[1])
911  if self.number_of_processes > 1:
912  distances = IMP.pmi.tools.scatter_and_gather(distances)
913 
914  # Finally compute distance to centroid
915  if self.rank == 0:
916  if structure_set_name1 == structure_set_name2:
917  structure_pointers = structure_pointers_1
918  number_of_structures = number_of_structures_1
919 
920  # calculate the distance from the first centroid
921  # and determine the centroid
922  distance = 0.0
923  distances_to_structure = {}
924  distances_to_structure_normalization = {}
925 
926  for n in structure_pointers:
927  distances_to_structure[n] = 0.0
928  distances_to_structure_normalization[n] = 0
929 
930  for k in distances:
931  distance += distances[k]
932  distances_to_structure[k[0]] += distances[k]
933  distances_to_structure[k[1]] += distances[k]
934  distances_to_structure_normalization[k[0]] += 1
935  distances_to_structure_normalization[k[1]] += 1
936 
937  for n in structure_pointers:
938  distances_to_structure[n] = \
939  distances_to_structure[n] \
940  / distances_to_structure_normalization[n]
941 
942  min_distance = min([distances_to_structure[n]
943  for n in distances_to_structure])
944  centroid_index = [k for k, v in
945  distances_to_structure.items()
946  if v == min_distance][0]
947  centroid_rmf_name = \
948  self.rmf_names_frames[
949  structure_set_name1][centroid_index]
950 
951  centroid_distance = 0.0
952  distance_list = []
953  for n in range(number_of_structures):
954  dist = self._get_distance(
955  structure_set_name1, structure_set_name1,
956  selection_name, centroid_index, n)
957  centroid_distance += dist
958  distance_list.append(dist)
959 
960  centroid_distance /= number_of_structures
961  if outfile is not None:
962  of.write(str(selection_name) + " " +
963  structure_set_name1 +
964  " average centroid distance " +
965  str(centroid_distance) + "\n")
966  of.write(str(selection_name) + " " +
967  structure_set_name1 + " centroid index " +
968  str(centroid_index) + "\n")
969  of.write(str(selection_name) + " " +
970  structure_set_name1 + " centroid rmf name " +
971  str(centroid_rmf_name) + "\n")
972  of.write(str(selection_name) + " " +
973  structure_set_name1 +
974  " median centroid distance " +
975  str(np.median(distance_list)) + "\n")
976 
977  average_pairwise_distances = sum(
978  distances.values())/len(list(distances.values()))
979  if outfile is not None:
980  of.write(str(selection_name) + " " + structure_set_name1 +
981  " " + structure_set_name2 +
982  " average pairwise distance " +
983  str(average_pairwise_distances) + "\n")
984  if outfile is not None:
985  of.close()
986  return centroid_index
987 
988  def get_rmsf(self, structure_set_name, outdir="./", skip=1,
989  set_plot_yaxis_range=None):
990  """ Calculate the residue mean square fluctuations (RMSF).
991  Automatically outputs as data file and pdf
992  @param structure_set_name Which structure set to calculate RMSF for
993  @param outdir Where to write the files
994  @param skip Skip this number of structures
995  @param set_plot_yaxis_range In case you need to change the plot
996  """
997  # get the centroid structure for the whole complex
998  centroid_index = self.get_precision(structure_set_name,
999  structure_set_name,
1000  outfile=None,
1001  skip=skip)
1002  if self.rank == 0:
1003  for sel_name in self.protein_names:
1004  self.selection_dictionary.update({sel_name: [sel_name]})
1005  try:
1006  number_of_structures = \
1007  len(self.structures_dictionary[
1008  structure_set_name][sel_name])
1009  except KeyError:
1010  # that protein was not included in the selection
1011  continue
1012  rpim = self.residue_particle_index_map[sel_name]
1013  outfile = outdir+"/rmsf."+sel_name+".dat"
1014  of = open(outfile, "w")
1015  residue_distances = {}
1016  residue_nblock = {}
1017  for index in range(number_of_structures):
1018  distances = self._get_particle_distances(
1019  structure_set_name, structure_set_name, sel_name,
1020  centroid_index, index)
1021  for nblock, block in enumerate(rpim):
1022  for residue_number in block:
1023  residue_nblock[residue_number] = nblock
1024  if residue_number not in residue_distances:
1025  residue_distances[residue_number] = \
1026  [distances[nblock]]
1027  else:
1028  residue_distances[
1029  residue_number].append(distances[nblock])
1030 
1031  residues = []
1032  rmsfs = []
1033  for rn in residue_distances:
1034  residues.append(rn)
1035  rmsf = np.std(residue_distances[rn])
1036  rmsfs.append(rmsf)
1037  of.write(str(rn) + " " + str(residue_nblock[rn])
1038  + " " + str(rmsf) + "\n")
1039 
1040  IMP.pmi.output.plot_xy_data(
1041  residues, rmsfs, title=sel_name,
1042  out_fn=outdir+"/rmsf."+sel_name, display=False,
1043  set_plot_yaxis_range=set_plot_yaxis_range,
1044  xlabel='Residue Number', ylabel='Standard error')
1045  of.close()
1046 
1047  def set_reference_structure(self, rmf_name, rmf_frame_index):
1048  """Read in a structure used for reference computation.
1049  Needed before calling get_average_distance_wrt_reference_structure()
1050  @param rmf_name The RMF file to read the reference
1051  @param rmf_frame_index The index in that file
1052  """
1053  particles_resolution_one = self._get_structure(rmf_frame_index,
1054  rmf_name)
1055  self.reference_rmf_names_frames = (rmf_name, rmf_frame_index)
1056 
1057  for selection_name in self.selection_dictionary:
1058  selection_tuple = self.selection_dictionary[selection_name]
1059  coords = self._select_coordinates(
1060  selection_tuple, particles_resolution_one, self.prots[0])
1061  self.reference_structures_dictionary[selection_name] = coords
1062 
1064  self, structure_set_name, alignment_selection_key):
1065  """First align then calculate RMSD
1066  @param structure_set_name: the name of the structure set
1067  @param alignment_selection_key: the key containing the selection tuples
1068  needed to make the alignment stored in self.selection_dictionary
1069  @return: for each structure in the structure set, returns the rmsd
1070  """
1071  ret = {}
1072  if self.reference_structures_dictionary == {}:
1073  print("Cannot compute until you set a reference structure")
1074  return
1075 
1076  align_reference_coordinates = \
1077  self.reference_structures_dictionary[alignment_selection_key]
1078  align_coordinates = self.structures_dictionary[
1079  structure_set_name][alignment_selection_key]
1080  transformations = []
1081  for c in align_coordinates:
1083  {"All": align_reference_coordinates}, {"All": c})
1084  transformation = Ali.align()[1]
1085  transformations.append(transformation)
1086  for selection_name in self.selection_dictionary:
1087  reference_coordinates = \
1088  self.reference_structures_dictionary[selection_name]
1089  coordinates2 = [IMP.algebra.Vector3D(c)
1090  for c in reference_coordinates]
1091  distances = []
1092  for n, sc in enumerate(self.structures_dictionary[
1093  structure_set_name][selection_name]):
1094  coordinates1 = [
1095  transformations[n].get_transformed(IMP.algebra.Vector3D(c))
1096  for c in sc]
1097  distance = IMP.algebra.get_rmsd(coordinates1, coordinates2)
1098  distances.append(distance)
1099  ret[selection_name] = {
1100  'all_distances': distances,
1101  'average_distance': sum(distances)/len(distances),
1102  'minimum_distance': min(distances)}
1103  return ret
1104 
1105  def _get_median(self, list_of_values):
1106  return np.median(np.array(list_of_values))
1107 
1108  def get_average_distance_wrt_reference_structure(self, structure_set_name):
1109  """Compare the structure set to the reference structure.
1110  @param structure_set_name The structure set to compute this on
1111  @note First call set_reference_structure()
1112  """
1113  ret = {}
1114  if self.reference_structures_dictionary == {}:
1115  print("Cannot compute until you set a reference structure")
1116  return
1117  for selection_name in self.selection_dictionary:
1118  reference_coordinates = self.reference_structures_dictionary[
1119  selection_name]
1120  coordinates2 = [IMP.algebra.Vector3D(c)
1121  for c in reference_coordinates]
1122  distances = []
1123  for sc in self.structures_dictionary[
1124  structure_set_name][selection_name]:
1125  coordinates1 = [IMP.algebra.Vector3D(c) for c in sc]
1126  if self.style == 'pairwise_drmsd_k':
1127  distance = IMP.atom.get_drmsd(coordinates1, coordinates2)
1128  if self.style == 'pairwise_drms_k':
1129  distance = IMP.atom.get_drms(coordinates1, coordinates2)
1130  if self.style == 'pairwise_drmsd_Q':
1131  distance = IMP.atom.get_drmsd_Q(
1132  coordinates1, coordinates2, self.threshold)
1133  if self.style == 'pairwise_rmsd':
1134  distance = IMP.algebra.get_rmsd(coordinates1, coordinates2)
1135  distances.append(distance)
1136 
1137  print(selection_name, "average distance",
1138  sum(distances)/len(distances), "minimum distance",
1139  min(distances), 'nframes', len(distances))
1140  ret[selection_name] = {
1141  'average_distance': sum(distances)/len(distances),
1142  'minimum_distance': min(distances)}
1143  return ret
1144 
1145  def get_coordinates(self):
1146  pass
1147 
1148  def set_precision_style(self, style):
1149  if style in self.styles:
1150  self.style = style
1151  else:
1152  raise ValueError("No such style")
1153 
1154 
1155 class GetModelDensity(object):
1156  """Compute mean density maps from structures.
1157 
1158  Keeps a dictionary of density maps,
1159  keys are in the custom ranges. When you call add_subunits_density, it adds
1160  particle coordinates to the existing density maps.
1161  """
1162 
1163  def __init__(self, custom_ranges, representation=None, resolution=20.0,
1164  voxel=5.0):
1165  """Constructor.
1166  @param custom_ranges Required. It's a dictionary, keys are the
1167  density component names, values are selection tuples
1168  e.g. {'kin28':[['kin28',1,-1]],
1169  'density_name_1' :[('ccl1')],
1170  'density_name_2' :[(1,142,'tfb3d1'),
1171  (143,700,'tfb3d2')],
1172  @param resolution The MRC resolution of the output map (in
1173  Angstrom unit)
1174  @param voxel The voxel size for the output map (lower is slower)
1175  """
1176 
1177  if representation:
1179  "The 'representation' argument is deprecated "
1180  "(and is ignored). Pass hierarchies to "
1181  "add_subunits_density() instead.")
1182  self.MRCresolution = resolution
1183  self.voxel = voxel
1184  self.densities = {}
1185  self.count_models = 0.0
1186  self.custom_ranges = custom_ranges
1187 
1188  def add_subunits_density(self, hierarchy):
1189  """Add a frame to the densities.
1190  @param hierarchy The hierarchy to add.
1191  """
1192  self.count_models += 1.0
1193 
1194  part_dict = get_particles_at_resolution_one(hierarchy)
1195  all_particles_by_resolution = []
1196  for name in part_dict:
1197  all_particles_by_resolution += part_dict[name]
1198 
1199  for density_name in self.custom_ranges:
1200  parts = []
1201  all_particles_by_segments = []
1202 
1203  for seg in self.custom_ranges[density_name]:
1204  if isinstance(seg, str):
1205  s = IMP.atom.Selection(hierarchy, molecule=seg)
1206  elif isinstance(seg, tuple) and len(seg) == 2:
1207  s = IMP.atom.Selection(
1208  hierarchy, molecule=seg[0], copy_index=seg[1])
1209  elif isinstance(seg, tuple) and len(seg) == 3:
1210  s = IMP.atom.Selection(
1211  hierarchy, molecule=seg[2],
1212  residue_indexes=range(seg[0], seg[1] + 1))
1213  elif isinstance(seg, tuple) and len(seg) == 4:
1214  s = IMP.atom.Selection(
1215  hierarchy, molecule=seg[2],
1216  residue_indexes=range(seg[0], seg[1] + 1),
1217  copy_index=seg[3])
1218  else:
1219  raise Exception('could not understand selection tuple '
1220  + str(seg))
1221 
1222  all_particles_by_segments += s.get_selected_particles()
1223  parts = all_particles_by_segments
1224  self._create_density_from_particles(parts, density_name)
1225 
1226  def normalize_density(self):
1227  pass
1228 
1229  def _create_density_from_particles(self, ps, name,
1230  kernel_type='GAUSSIAN'):
1231  '''Internal function for adding to densities.
1232  pass XYZR particles with mass and create a density from them.
1233  kernel type options are GAUSSIAN, BINARIZED_SPHERE, and SPHERE.'''
1234  dmap = IMP.em.SampledDensityMap(ps, self.MRCresolution, self.voxel)
1235  dmap.calcRMS()
1236  dmap.set_was_used(True)
1237  if name not in self.densities:
1238  self.densities[name] = dmap
1239  else:
1240  bbox1 = IMP.em.get_bounding_box(self.densities[name])
1241  bbox2 = IMP.em.get_bounding_box(dmap)
1242  bbox1 += bbox2
1243  dmap3 = IMP.em.create_density_map(bbox1, self.voxel)
1244  dmap3.set_was_used(True)
1245  dmap3.add(dmap)
1246  dmap3.add(self.densities[name])
1247  self.densities[name] = dmap3
1248 
1249  def get_density_keys(self):
1250  return list(self.densities.keys())
1251 
1252  def get_density(self, name):
1253  """Get the current density for some component name"""
1254  if name not in self.densities:
1255  return None
1256  else:
1257  return self.densities[name]
1258 
1259  def write_mrc(self, path="./", suffix=None):
1260  import os
1261  import errno
1262  for density_name in self.densities:
1263  self.densities[density_name].multiply(1. / self.count_models)
1264  if suffix is None:
1265  name = path + "/" + density_name + ".mrc"
1266  else:
1267  name = path + "/" + density_name + "." + suffix + ".mrc"
1268  path, file = os.path.split(name)
1269  if not os.path.exists(path):
1270  try:
1271  os.makedirs(path)
1272  except OSError as e:
1273  if e.errno != errno.EEXIST:
1274  raise
1275  IMP.em.write_map(self.densities[density_name], name,
1277 
1278 
1279 class GetContactMap(object):
1280 
1281  def __init__(self, distance=15.):
1282  self.distance = distance
1283  self.contactmap = ''
1284  self.namelist = []
1285  self.xlinks = 0
1286  self.XL = {}
1287  self.expanded = {}
1288  self.resmap = {}
1289 
1290  def set_prot(self, prot):
1291  self.prot = prot
1292  self.protnames = []
1293 
1294  particles_dictionary = get_particles_at_resolution_one(self.prot)
1295 
1296  for name in particles_dictionary:
1297  residue_indexes = []
1298  for p in particles_dictionary[name]:
1299  print(p.get_name())
1300  residue_indexes.extend(IMP.pmi.tools.get_residue_indexes(p))
1301 
1302  if len(residue_indexes) != 0:
1303  self.protnames.append(name)
1304 
1305  def get_subunit_coords(self, frame, align=0):
1306  from scipy.spatial.distance import cdist
1307  coords = []
1308  radii = []
1309  namelist = []
1310  for part in self.prot.get_children():
1311  SortedSegments = []
1312  print(part)
1313  for chl in part.get_children():
1314  start = IMP.atom.get_leaves(chl)[0]
1315 
1316  startres = IMP.atom.Fragment(start).get_residue_indexes()[0]
1317  SortedSegments.append((chl, startres))
1318  SortedSegments = sorted(SortedSegments, key=itemgetter(1))
1319 
1320  for sgmnt in SortedSegments:
1321  for leaf in IMP.atom.get_leaves(sgmnt[0]):
1322  p = IMP.core.XYZR(leaf)
1323  crd = np.array([p.get_x(), p.get_y(), p.get_z()])
1324 
1325  coords.append(crd)
1326  radii.append(p.get_radius())
1327 
1328  new_name = part.get_name() + '_' + sgmnt[0].get_name() + \
1329  '_' + \
1330  str(IMP.atom.Fragment(leaf)
1331  .get_residue_indexes()[0])
1332  namelist.append(new_name)
1333  self.expanded[new_name] = len(
1334  IMP.atom.Fragment(leaf).get_residue_indexes())
1335  if part.get_name() not in self.resmap:
1336  self.resmap[part.get_name()] = {}
1337  for res in IMP.atom.Fragment(leaf).get_residue_indexes():
1338  self.resmap[part.get_name()][res] = new_name
1339 
1340  coords = np.array(coords)
1341  radii = np.array(radii)
1342  if len(self.namelist) == 0:
1343  self.namelist = namelist
1344  self.contactmap = np.zeros((len(coords), len(coords)))
1345  distances = cdist(coords, coords)
1346  distances = (distances - radii).T - radii
1347  distances = distances <= self.distance
1348  self.contactmap += distances
1349 
1350  def add_xlinks(self, filename,
1351  identification_string='ISDCrossLinkMS_Distance_'):
1352  self.xlinks = 1
1353  with open(filename) as data:
1354  D = data.readlines()
1355 
1356  for d in D:
1357  if identification_string in d:
1358  d = d.replace("_", " ").replace("-", " ").replace(
1359  ":", " ").split()
1360 
1361  t1, t2 = (d[0], d[1]), (d[1], d[0])
1362  if t1 not in self.XL:
1363  self.XL[t1] = [(int(d[2]) + 1, int(d[3]) + 1)]
1364  self.XL[t2] = [(int(d[3]) + 1, int(d[2]) + 1)]
1365  else:
1366  self.XL[t1].append((int(d[2]) + 1, int(d[3]) + 1))
1367  self.XL[t2].append((int(d[3]) + 1, int(d[2]) + 1))
1368 
1369  def dist_matrix(self, skip_cmap=0, skip_xl=1, outname=None):
1370  K = self.namelist
1371  M = self.contactmap
1372  C, R = [], []
1373  proteins = self.protnames
1374 
1375  # exp new
1376  if skip_cmap == 0:
1377  Matrices = {}
1378  proteins = [p.get_name() for p in self.prot.get_children()]
1379  missing = []
1380  for p1 in range(len(proteins)):
1381  for p2 in range(p1, len(proteins)):
1382  pl1, pl2 = (max(self.resmap[proteins[p1]].keys()),
1383  max(self.resmap[proteins[p2]].keys()))
1384  pn1, pn2 = proteins[p1], proteins[p2]
1385  mtr = np.zeros((pl1 + 1, pl2 + 1))
1386  print('Creating matrix for: ',
1387  p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1388  for i1 in range(1, pl1 + 1):
1389  for i2 in range(1, pl2 + 1):
1390  try:
1391  r1 = K.index(self.resmap[pn1][i1])
1392  r2 = K.index(self.resmap[pn2][i2])
1393  r = M[r1, r2]
1394  mtr[i1 - 1, i2 - 1] = r
1395  except KeyError:
1396  missing.append((pn1, pn2, i1, i2))
1397  Matrices[(pn1, pn2)] = mtr
1398 
1399  # add cross-links
1400  if skip_xl == 0:
1401  if self.XL == {}:
1402  raise ValueError("cross-links were not provided, use "
1403  "add_xlinks function!")
1404  Matrices_xl = {}
1405  for p1 in range(len(proteins)):
1406  for p2 in range(p1, len(proteins)):
1407  pl1, pl2 = (max(self.resmap[proteins[p1]].keys()),
1408  max(self.resmap[proteins[p2]].keys()))
1409  pn1, pn2 = proteins[p1], proteins[p2]
1410  mtr = np.zeros((pl1 + 1, pl2 + 1))
1411  flg = 0
1412  try:
1413  xls = self.XL[(pn1, pn2)]
1414  except KeyError:
1415  try:
1416  xls = self.XL[(pn2, pn1)]
1417  flg = 1
1418  except KeyError:
1419  flg = 2
1420  if flg == 0:
1421  print('Creating matrix for: ',
1422  p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1423  for xl1, xl2 in xls:
1424  if xl1 > pl1:
1425  print('X' * 10, xl1, xl2)
1426  xl1 = pl1
1427  if xl2 > pl2:
1428  print('X' * 10, xl1, xl2)
1429  xl2 = pl2
1430  mtr[xl1 - 1, xl2 - 1] = 100
1431  elif flg == 1:
1432  print('Creating matrix for: ',
1433  p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1434  for xl1, xl2 in xls:
1435  if xl1 > pl1:
1436  print('X' * 10, xl1, xl2)
1437  xl1 = pl1
1438  if xl2 > pl2:
1439  print('X' * 10, xl1, xl2)
1440  xl2 = pl2
1441  mtr[xl2 - 1, xl1 - 1] = 100
1442  else:
1443  print('No cross-links between: ', pn1, pn2)
1444  Matrices_xl[(pn1, pn2)] = mtr
1445 
1446  # make list of protein names and create coordinate lists
1447  C = proteins
1448  # W is the component length list,
1449  # R is the contiguous coordinates list
1450  W, R = [], []
1451  for i, c in enumerate(C):
1452  cl = max(self.resmap[c].keys())
1453  W.append(cl)
1454  if i == 0:
1455  R.append(cl)
1456  else:
1457  R.append(R[-1] + cl)
1458 
1459  # start plotting
1460  if outname:
1461  # Don't require a display
1462  import matplotlib as mpl
1463  mpl.use('Agg')
1464  import matplotlib.pyplot as plt
1465  import matplotlib.gridspec as gridspec
1466  import scipy.sparse as sparse
1467 
1468  plt.figure()
1469  gs = gridspec.GridSpec(len(W), len(W),
1470  width_ratios=W,
1471  height_ratios=W)
1472 
1473  cnt = 0
1474  for x1, r1 in enumerate(R):
1475  for x2, r2 in enumerate(R):
1476  ax = plt.subplot(gs[cnt])
1477  if skip_cmap == 0:
1478  try:
1479  mtr = Matrices[(C[x1], C[x2])]
1480  except KeyError:
1481  mtr = Matrices[(C[x2], C[x1])].T
1482  ax.imshow(
1483  np.log(mtr),
1484  interpolation='nearest',
1485  vmin=0.,
1486  vmax=log(mtr.max()) if mtr.max() > 0. else 1.)
1487  ax.set_xticks([])
1488  ax.set_yticks([])
1489  if skip_xl == 0:
1490  try:
1491  mtr = Matrices_xl[(C[x1], C[x2])]
1492  except KeyError:
1493  mtr = Matrices_xl[(C[x2], C[x1])].T
1494  ax.spy(
1495  sparse.csr_matrix(mtr),
1496  markersize=10,
1497  color='white',
1498  linewidth=100,
1499  alpha=0.5)
1500  ax.set_xticks([])
1501  ax.set_yticks([])
1502 
1503  cnt += 1
1504  if x2 == 0:
1505  ax.set_ylabel(C[x1], rotation=90)
1506  if outname:
1507  plt.savefig(outname + ".pdf", dpi=300, transparent="False")
1508  else:
1509  plt.show()
1510 
1511 
1512 # ------------------------------------------------------------------
1513 # a few random tools
1514 
1515 def get_hiers_from_rmf(model, frame_number, rmf_file):
1516  # I have to deprecate this function
1517  print("getting coordinates for frame %i rmf file %s"
1518  % (frame_number, rmf_file))
1519 
1520  # load the frame
1521  rh = RMF.open_rmf_file_read_only(rmf_file)
1522 
1523  try:
1524  prots = IMP.rmf.create_hierarchies(rh, model)
1525  except IOError:
1526  print("Unable to open rmf file %s" % (rmf_file))
1527  return None
1528 
1529  try:
1530  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1531  except IOError:
1532  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1533  return None
1534  model.update()
1535  del rh
1536 
1537  return prots
1538 
1539 
1540 def link_hiers_to_rmf(model, hiers, frame_number, rmf_file):
1541  print("linking hierarchies for frame %i rmf file %s"
1542  % (frame_number, rmf_file))
1543  rh = RMF.open_rmf_file_read_only(rmf_file)
1544  IMP.rmf.link_hierarchies(rh, hiers)
1545  try:
1546  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1547  except: # noqa: E722
1548  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1549  return False
1550  model.update()
1551  del rh
1552  return True
1553 
1554 
1555 def get_hiers_and_restraints_from_rmf(model, frame_number, rmf_file):
1556  # I have to deprecate this function
1557  print("getting coordinates for frame %i rmf file %s"
1558  % (frame_number, rmf_file))
1559 
1560  # load the frame
1561  rh = RMF.open_rmf_file_read_only(rmf_file)
1562 
1563  try:
1564  prots = IMP.rmf.create_hierarchies(rh, model)
1565  rs = IMP.rmf.create_restraints(rh, model)
1566  except IOError:
1567  print("Unable to open rmf file %s" % (rmf_file))
1568  return None, None
1569  try:
1570  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1571  except: # noqa: E722
1572  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1573  return None, None
1574  model.update()
1575  del rh
1576 
1577  return prots, rs
1578 
1579 
1580 def link_hiers_and_restraints_to_rmf(model, hiers, rs, frame_number, rmf_file):
1581  print("linking hierarchies for frame %i rmf file %s"
1582  % (frame_number, rmf_file))
1583  rh = RMF.open_rmf_file_read_only(rmf_file)
1584  IMP.rmf.link_hierarchies(rh, hiers)
1585  IMP.rmf.link_restraints(rh, rs)
1586  try:
1587  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1588  except: # noqa: E722
1589  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1590  return False
1591  model.update()
1592  del rh
1593  return True
1594 
1595 
1597  """Get particles at res 1, or any beads, based on the name.
1598  No Representation is needed. This is mainly used when the hierarchy
1599  is read from an RMF file.
1600 
1601  @return a dictionary of component names and their particles
1602  @note If the root node is named "System" or is a "State", do
1603  proper selection.
1604  """
1605  particle_dict = {}
1606 
1607  # attempt to give good results for PMI2
1608  for mol in IMP.atom.get_by_type(prot, IMP.atom.MOLECULE_TYPE):
1609  sel = IMP.atom.Selection(mol, resolution=1)
1610  particle_dict[mol.get_name()] = sel.get_selected_particles()
1611  return particle_dict
1612 
1613 
1615  """Get particles at res 10, or any beads, based on the name.
1616  No Representation is needed.
1617  This is mainly used when the hierarchy is read from an RMF file.
1618 
1619  @return a dictionary of component names and their particles
1620  @note If the root node is named "System" or is a "State", do proper
1621  selection.
1622  """
1623  particle_dict = {}
1624  # attempt to give good results for PMI2
1625  for mol in IMP.atom.get_by_type(prot, IMP.atom.MOLECULE_TYPE):
1626  sel = IMP.atom.Selection(mol, resolution=10)
1627  particle_dict[mol.get_name()] = sel.get_selected_particles()
1628  return particle_dict
1629 
1630 
1631 class CrossLinkTable(object):
1632  """Visualization of cross-links"""
1633  def __init__(self):
1634  self.crosslinks = []
1635  self.external_csv_data = None
1636  self.crosslinkedprots = set()
1637  self.mindist = +10000000.0
1638  self.maxdist = -10000000.0
1639  self.contactmap = None
1640 
1641  def set_hierarchy(self, prot):
1642  self.prot_length_dict = {}
1643  self.model = prot.get_model()
1644 
1645  for i in prot.get_children():
1646  name = i.get_name()
1647  residue_indexes = []
1648  for p in IMP.atom.get_leaves(i):
1649  residue_indexes.extend(IMP.pmi.tools.get_residue_indexes(p))
1650 
1651  if len(residue_indexes) != 0:
1652  self.prot_length_dict[name] = max(residue_indexes)
1653 
1654  def set_coordinates_for_contact_map(self, rmf_name, rmf_frame_index):
1655  from scipy.spatial.distance import cdist
1656 
1657  rh = RMF.open_rmf_file_read_only(rmf_name)
1658  prots = IMP.rmf.create_hierarchies(rh, self.model)
1659  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
1660  print("getting coordinates for frame %i rmf file %s"
1661  % (rmf_frame_index, rmf_name))
1662  del rh
1663 
1664  coords = []
1665  radii = []
1666 
1667  particles_dictionary = get_particles_at_resolution_one(prots[0])
1668 
1669  resindex = 0
1670  self.index_dictionary = {}
1671 
1672  for name in particles_dictionary:
1673  residue_indexes = []
1674  for p in particles_dictionary[name]:
1675  print(p.get_name())
1676  residue_indexes = IMP.pmi.tools.get_residue_indexes(p)
1677 
1678  if len(residue_indexes) != 0:
1679 
1680  for res in range(min(residue_indexes),
1681  max(residue_indexes) + 1):
1682  d = IMP.core.XYZR(p)
1683 
1684  crd = np.array([d.get_x(), d.get_y(), d.get_z()])
1685  coords.append(crd)
1686  radii.append(d.get_radius())
1687  if name not in self.index_dictionary:
1688  self.index_dictionary[name] = [resindex]
1689  else:
1690  self.index_dictionary[name].append(resindex)
1691  resindex += 1
1692 
1693  coords = np.array(coords)
1694  radii = np.array(radii)
1695 
1696  distances = cdist(coords, coords)
1697  distances = (distances - radii).T - radii
1698 
1699  distances = np.where(distances <= 20.0, 1.0, 0)
1700  if self.contactmap is None:
1701  self.contactmap = np.zeros((len(coords), len(coords)))
1702  self.contactmap += distances
1703 
1704  for prot in prots:
1705  IMP.atom.destroy(prot)
1706 
1707  def set_crosslinks(
1708  self, data_file, search_label='ISDCrossLinkMS_Distance_',
1709  mapping=None, filter_label=None,
1710  # provide a list of rmf base names to filter the stat file
1711  filter_rmf_file_names=None, external_csv_data_file=None,
1712  external_csv_data_file_unique_id_key="Unique ID"):
1713 
1714  # example key ISDCrossLinkMS_Distance_intrarb_937-State:0-108:RPS3_55:RPS30-1-1-0.1_None # noqa: E501
1715  # mapping is a dictionary that maps standard keywords to entry
1716  # positions in the key string
1717  # confidence class is a filter that
1718  # external datafile is a datafile that contains further information
1719  # on the cross-links
1720  # it will use the unique id to create the dictionary keys
1721 
1722  po = IMP.pmi.output.ProcessOutput(data_file)
1723  keys = po.get_keys()
1724 
1725  xl_keys = [k for k in keys if search_label in k]
1726 
1727  if filter_rmf_file_names is not None:
1728  rmf_file_key = "local_rmf_file_name"
1729  fs = po.get_fields(xl_keys+[rmf_file_key])
1730  else:
1731  fs = po.get_fields(xl_keys)
1732 
1733  # this dictionary stores the occurency of given cross-links
1734  self.cross_link_frequency = {}
1735 
1736  # this dictionary stores the series of distances for given cross-linked
1737  # residues
1738  self.cross_link_distances = {}
1739 
1740  # this dictionary stores the series of distances for given cross-linked
1741  # residues
1742  self.cross_link_distances_unique = {}
1743 
1744  if external_csv_data_file is not None:
1745  # this dictionary stores the further information on cross-links
1746  # labeled by unique ID
1747  self.external_csv_data = {}
1748  xldb = IMP.pmi.tools.get_db_from_csv(external_csv_data_file)
1749 
1750  for xl in xldb:
1751  self.external_csv_data[
1752  xl[external_csv_data_file_unique_id_key]] = xl
1753 
1754  # this list keeps track the tuple of cross-links and sample
1755  # so that we don't count twice the same cross-linked residues in the
1756  # same sample
1757  cross_link_frequency_list = []
1758 
1759  self.unique_cross_link_list = []
1760 
1761  for key in xl_keys:
1762  print(key)
1763  keysplit = key.replace("_", " ").replace("-", " ").replace(
1764  ":", " ").split()
1765 
1766  if filter_label is not None:
1767  if filter_label not in keysplit:
1768  continue
1769 
1770  if mapping is None:
1771  r1 = int(keysplit[5])
1772  c1 = keysplit[6]
1773  r2 = int(keysplit[7])
1774  c2 = keysplit[8]
1775  try:
1776  confidence = keysplit[12]
1777  except: # noqa: E722
1778  confidence = '0.0'
1779  try:
1780  unique_identifier = keysplit[3]
1781  except: # noqa: E722
1782  unique_identifier = '0'
1783  else:
1784  r1 = int(keysplit[mapping["Residue1"]])
1785  c1 = keysplit[mapping["Protein1"]]
1786  r2 = int(keysplit[mapping["Residue2"]])
1787  c2 = keysplit[mapping["Protein2"]]
1788  try:
1789  confidence = keysplit[mapping["Confidence"]]
1790  except: # noqa: E722
1791  confidence = '0.0'
1792  try:
1793  unique_identifier = keysplit[mapping["Unique Identifier"]]
1794  except: # noqa: E722
1795  unique_identifier = '0'
1796 
1797  self.crosslinkedprots.add(c1)
1798  self.crosslinkedprots.add(c2)
1799 
1800  # construct the list of distances corresponding to the input rmf
1801  # files
1802 
1803  dists = []
1804  if filter_rmf_file_names is not None:
1805  for n, d in enumerate(fs[key]):
1806  if fs[rmf_file_key][n] in filter_rmf_file_names:
1807  dists.append(float(d))
1808  else:
1809  dists = [float(f) for f in fs[key]]
1810 
1811  # check if the input confidence class corresponds to the
1812  # one of the cross-link
1813 
1814  mdist = self.median(dists)
1815 
1816  stdv = np.std(np.array(dists))
1817  if self.mindist > mdist:
1818  self.mindist = mdist
1819  if self.maxdist < mdist:
1820  self.maxdist = mdist
1821 
1822  # calculate the frequency of unique cross-links within the same
1823  # sample
1824  if (r1, c1, r2, c2, mdist) not in cross_link_frequency_list:
1825  if (r1, c1, r2, c2) not in self.cross_link_frequency:
1826  self.cross_link_frequency[(r1, c1, r2, c2)] = 1
1827  self.cross_link_frequency[(r2, c2, r1, c1)] = 1
1828  else:
1829  self.cross_link_frequency[(r2, c2, r1, c1)] += 1
1830  self.cross_link_frequency[(r1, c1, r2, c2)] += 1
1831  cross_link_frequency_list.append((r1, c1, r2, c2))
1832  cross_link_frequency_list.append((r2, c2, r1, c1))
1833  self.unique_cross_link_list.append(
1834  (r1, c1, r2, c2, mdist))
1835 
1836  if (r1, c1, r2, c2) not in self.cross_link_distances:
1837  self.cross_link_distances[(
1838  r1, c1, r2, c2, mdist, confidence)] = dists
1839  self.cross_link_distances[(
1840  r2, c2, r1, c1, mdist, confidence)] = dists
1841  self.cross_link_distances_unique[(r1, c1, r2, c2)] = dists
1842  else:
1843  self.cross_link_distances[(
1844  r2, c2, r1, c1, mdist, confidence)] += dists
1845  self.cross_link_distances[(
1846  r1, c1, r2, c2, mdist, confidence)] += dists
1847 
1848  self.crosslinks.append(
1849  (r1, c1, r2, c2, mdist, stdv, confidence, unique_identifier,
1850  'original'))
1851  self.crosslinks.append(
1852  (r2, c2, r1, c1, mdist, stdv, confidence, unique_identifier,
1853  'reversed'))
1854 
1855  self.cross_link_frequency_inverted = {}
1856  for xl in self.unique_cross_link_list:
1857  (r1, c1, r2, c2, mdist) = xl
1858  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
1859  if frequency not in self.cross_link_frequency_inverted:
1860  self.cross_link_frequency_inverted[
1861  frequency] = [(r1, c1, r2, c2)]
1862  else:
1863  self.cross_link_frequency_inverted[
1864  frequency].append((r1, c1, r2, c2))
1865 
1866  # -------------
1867 
1868  def median(self, mylist):
1869  sorts = sorted(mylist)
1870  length = len(sorts)
1871  print(length)
1872  if length == 1:
1873  return mylist[0]
1874  if not length % 2:
1875  return (sorts[length / 2] + sorts[length / 2 - 1]) / 2.0
1876  return sorts[length / 2]
1877 
1878  def set_threshold(self, threshold):
1879  self.threshold = threshold
1880 
1881  def set_tolerance(self, tolerance):
1882  self.tolerance = tolerance
1883 
1884  def colormap(self, dist):
1885  if dist < self.threshold - self.tolerance:
1886  return "Green"
1887  elif dist >= self.threshold + self.tolerance:
1888  return "Orange"
1889  else:
1890  return "Red"
1891 
1892  def write_cross_link_database(self, filename, format='csv'):
1893  import csv
1894 
1895  fieldnames = [
1896  "Unique ID", "Protein1", "Residue1", "Protein2", "Residue2",
1897  "Median Distance", "Standard Deviation", "Confidence",
1898  "Frequency", "Arrangement"]
1899 
1900  if self.external_csv_data is not None:
1901  keys = list(self.external_csv_data.keys())
1902  innerkeys = list(self.external_csv_data[keys[0]].keys())
1903  innerkeys.sort()
1904  fieldnames += innerkeys
1905 
1906  dw = csv.DictWriter(
1907  open(filename, "w"),
1908  delimiter=',',
1909  fieldnames=fieldnames)
1910  dw.writeheader()
1911  for xl in self.crosslinks:
1912  (r1, c1, r2, c2, mdist, stdv, confidence,
1913  unique_identifier, descriptor) = xl
1914  if descriptor == 'original':
1915  outdict = {}
1916  outdict["Unique ID"] = unique_identifier
1917  outdict["Protein1"] = c1
1918  outdict["Protein2"] = c2
1919  outdict["Residue1"] = r1
1920  outdict["Residue2"] = r2
1921  outdict["Median Distance"] = mdist
1922  outdict["Standard Deviation"] = stdv
1923  outdict["Confidence"] = confidence
1924  outdict["Frequency"] = self.cross_link_frequency[
1925  (r1, c1, r2, c2)]
1926  if c1 == c2:
1927  arrangement = "Intra"
1928  else:
1929  arrangement = "Inter"
1930  outdict["Arrangement"] = arrangement
1931  if self.external_csv_data is not None:
1932  outdict.update(self.external_csv_data[unique_identifier])
1933 
1934  dw.writerow(outdict)
1935 
1936  def plot(self, prot_listx=None, prot_listy=None, no_dist_info=False,
1937  no_confidence_info=False, filter=None, layout="whole",
1938  crosslinkedonly=False, filename=None, confidence_classes=None,
1939  alphablend=0.1, scale_symbol_size=1.0, gap_between_components=0,
1940  rename_protein_map=None):
1941  # layout can be:
1942  # "lowerdiagonal" print only the lower diagonal plot
1943  # "upperdiagonal" print only the upper diagonal plot
1944  # "whole" print all
1945  # crosslinkedonly: plot only components that have cross-links
1946  # no_dist_info: if True will plot only the cross-links as grey spots
1947  # filter = tuple the tuple contains a keyword to be search
1948  # in the database
1949  # a relationship ">","==","<"
1950  # and a value
1951  # example ("ID_Score",">",40)
1952  # scale_symbol_size rescale the symbol for the cross-link
1953  # rename_protein_map is a dictionary to rename proteins
1954 
1955  import matplotlib as mpl
1956  mpl.use('Agg')
1957  import matplotlib.pyplot as plt
1958  import matplotlib.cm as cm
1959 
1960  fig = plt.figure(figsize=(10, 10))
1961  ax = fig.add_subplot(111)
1962 
1963  ax.set_xticks([])
1964  ax.set_yticks([])
1965 
1966  # set the list of proteins on the x axis
1967  if prot_listx is None:
1968  if crosslinkedonly:
1969  prot_listx = list(self.crosslinkedprots)
1970  else:
1971  prot_listx = list(self.prot_length_dict.keys())
1972  prot_listx.sort()
1973 
1974  nresx = gap_between_components + \
1975  sum([self.prot_length_dict[name]
1976  + gap_between_components for name in prot_listx])
1977 
1978  # set the list of proteins on the y axis
1979 
1980  if prot_listy is None:
1981  if crosslinkedonly:
1982  prot_listy = list(self.crosslinkedprots)
1983  else:
1984  prot_listy = list(self.prot_length_dict.keys())
1985  prot_listy.sort()
1986 
1987  nresy = gap_between_components + \
1988  sum([self.prot_length_dict[name]
1989  + gap_between_components for name in prot_listy])
1990 
1991  # this is the residue offset for each protein
1992  resoffsetx = {}
1993  resendx = {}
1994  res = gap_between_components
1995  for prot in prot_listx:
1996  resoffsetx[prot] = res
1997  res += self.prot_length_dict[prot]
1998  resendx[prot] = res
1999  res += gap_between_components
2000 
2001  resoffsety = {}
2002  resendy = {}
2003  res = gap_between_components
2004  for prot in prot_listy:
2005  resoffsety[prot] = res
2006  res += self.prot_length_dict[prot]
2007  resendy[prot] = res
2008  res += gap_between_components
2009 
2010  resoffsetdiagonal = {}
2011  res = gap_between_components
2012  for prot in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2013  resoffsetdiagonal[prot] = res
2014  res += self.prot_length_dict[prot]
2015  res += gap_between_components
2016 
2017  # plot protein boundaries
2018 
2019  xticks = []
2020  xlabels = []
2021  for n, prot in enumerate(prot_listx):
2022  res = resoffsetx[prot]
2023  end = resendx[prot]
2024  for proty in prot_listy:
2025  resy = resoffsety[proty]
2026  endy = resendy[proty]
2027  ax.plot([res, res], [resy, endy], 'k-', lw=0.4)
2028  ax.plot([end, end], [resy, endy], 'k-', lw=0.4)
2029  xticks.append((float(res) + float(end)) / 2)
2030  if rename_protein_map is not None:
2031  if prot in rename_protein_map:
2032  prot = rename_protein_map[prot]
2033  xlabels.append(prot)
2034 
2035  yticks = []
2036  ylabels = []
2037  for n, prot in enumerate(prot_listy):
2038  res = resoffsety[prot]
2039  end = resendy[prot]
2040  for protx in prot_listx:
2041  resx = resoffsetx[protx]
2042  endx = resendx[protx]
2043  ax.plot([resx, endx], [res, res], 'k-', lw=0.4)
2044  ax.plot([resx, endx], [end, end], 'k-', lw=0.4)
2045  yticks.append((float(res) + float(end)) / 2)
2046  if rename_protein_map is not None:
2047  if prot in rename_protein_map:
2048  prot = rename_protein_map[prot]
2049  ylabels.append(prot)
2050 
2051  # plot the contact map
2052  print(prot_listx, prot_listy)
2053 
2054  if self.contactmap is not None:
2055  tmp_array = np.zeros((nresx, nresy))
2056 
2057  for px in prot_listx:
2058  print(px)
2059  for py in prot_listy:
2060  print(py)
2061  resx = resoffsety[px]
2062  lengx = resendx[px] - 1
2063  resy = resoffsety[py]
2064  lengy = resendy[py] - 1
2065  indexes_x = self.index_dictionary[px]
2066  minx = min(indexes_x)
2067  maxx = max(indexes_x)
2068  indexes_y = self.index_dictionary[py]
2069  miny = min(indexes_y)
2070  maxy = max(indexes_y)
2071 
2072  print(px, py, minx, maxx, miny, maxy)
2073 
2074  try:
2075  tmp_array[
2076  resx:lengx,
2077  resy:lengy] = self.contactmap[
2078  minx:maxx,
2079  miny:maxy]
2080  except: # noqa: E722
2081  continue
2082 
2083  ax.imshow(tmp_array,
2084  cmap=cm.binary,
2085  origin='lower',
2086  interpolation='nearest')
2087 
2088  ax.set_xticks(xticks)
2089  ax.set_xticklabels(xlabels, rotation=90)
2090  ax.set_yticks(yticks)
2091  ax.set_yticklabels(ylabels)
2092  ax.set_xlim(0, nresx)
2093  ax.set_ylim(0, nresy)
2094 
2095  # set the cross-links
2096 
2097  already_added_xls = []
2098 
2099  for xl in self.crosslinks:
2100 
2101  (r1, c1, r2, c2, mdist, stdv, confidence,
2102  unique_identifier, descriptor) = xl
2103 
2104  if confidence_classes is not None:
2105  if confidence not in confidence_classes:
2106  continue
2107 
2108  try:
2109  pos1 = r1 + resoffsetx[c1]
2110  except: # noqa: E722
2111  continue
2112  try:
2113  pos2 = r2 + resoffsety[c2]
2114  except: # noqa: E722
2115  continue
2116 
2117  if filter is not None:
2118  xldb = self.external_csv_data[unique_identifier]
2119  xldb.update({"Distance": mdist})
2120  xldb.update({"Distance_stdv": stdv})
2121 
2122  if filter[1] == ">":
2123  if float(xldb[filter[0]]) <= float(filter[2]):
2124  continue
2125 
2126  if filter[1] == "<":
2127  if float(xldb[filter[0]]) >= float(filter[2]):
2128  continue
2129 
2130  if filter[1] == "==":
2131  if float(xldb[filter[0]]) != float(filter[2]):
2132  continue
2133 
2134  # all that below is used for plotting the diagonal
2135  # when you have a rectangolar plots
2136 
2137  pos_for_diagonal1 = r1 + resoffsetdiagonal[c1]
2138  pos_for_diagonal2 = r2 + resoffsetdiagonal[c2]
2139 
2140  if layout == 'lowerdiagonal':
2141  if pos_for_diagonal1 <= pos_for_diagonal2:
2142  continue
2143  if layout == 'upperdiagonal':
2144  if pos_for_diagonal1 >= pos_for_diagonal2:
2145  continue
2146 
2147  already_added_xls.append((r1, c1, r2, c2))
2148 
2149  if not no_confidence_info:
2150  if confidence == '0.01':
2151  markersize = 14 * scale_symbol_size
2152  elif confidence == '0.05':
2153  markersize = 9 * scale_symbol_size
2154  elif confidence == '0.1':
2155  markersize = 6 * scale_symbol_size
2156  else:
2157  markersize = 15 * scale_symbol_size
2158  else:
2159  markersize = 5 * scale_symbol_size
2160 
2161  if not no_dist_info:
2162  color = self.colormap(mdist)
2163  else:
2164  color = "gray"
2165 
2166  ax.plot(
2167  [pos1],
2168  [pos2],
2169  'o',
2170  c=color,
2171  alpha=alphablend,
2172  markersize=markersize)
2173 
2174  fig.set_size_inches(0.004 * nresx, 0.004 * nresy)
2175 
2176  for i in ax.spines.values():
2177  i.set_linewidth(2.0)
2178 
2179  if filename:
2180  plt.savefig(filename + ".pdf", dpi=300, transparent="False")
2181  else:
2182  plt.show()
2183 
2184  def get_frequency_statistics(self, prot_list,
2185  prot_list2=None):
2186 
2187  violated_histogram = {}
2188  satisfied_histogram = {}
2189  unique_cross_links = []
2190 
2191  for xl in self.unique_cross_link_list:
2192  (r1, c1, r2, c2, mdist) = xl
2193 
2194  # here we filter by the protein
2195  if prot_list2 is None:
2196  if c1 not in prot_list:
2197  continue
2198  if c2 not in prot_list:
2199  continue
2200  else:
2201  if c1 in prot_list and c2 in prot_list2:
2202  pass
2203  elif c1 in prot_list2 and c2 in prot_list:
2204  pass
2205  else:
2206  continue
2207 
2208  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
2209 
2210  if (r1, c1, r2, c2) not in unique_cross_links:
2211  if mdist > 35.0:
2212  if frequency not in violated_histogram:
2213  violated_histogram[frequency] = 1
2214  else:
2215  violated_histogram[frequency] += 1
2216  else:
2217  if frequency not in satisfied_histogram:
2218  satisfied_histogram[frequency] = 1
2219  else:
2220  satisfied_histogram[frequency] += 1
2221  unique_cross_links.append((r1, c1, r2, c2))
2222  unique_cross_links.append((r2, c2, r1, c1))
2223 
2224  print("# satisfied")
2225 
2226  total_number_of_crosslinks = 0
2227 
2228  for i in satisfied_histogram:
2229  # if i in violated_histogram:
2230  # print i, satisfied_histogram[i]+violated_histogram[i]
2231  # else:
2232  if i in violated_histogram:
2233  print(i, violated_histogram[i]+satisfied_histogram[i])
2234  else:
2235  print(i, satisfied_histogram[i])
2236  total_number_of_crosslinks += i*satisfied_histogram[i]
2237 
2238  print("# violated")
2239 
2240  for i in violated_histogram:
2241  print(i, violated_histogram[i])
2242  total_number_of_crosslinks += i*violated_histogram[i]
2243 
2244  print(total_number_of_crosslinks)
2245 
2246  def print_cross_link_binary_symbols(self, prot_list,
2247  prot_list2=None):
2248  tmp_matrix = []
2249  confidence_list = []
2250  for xl in self.crosslinks:
2251  (r1, c1, r2, c2, mdist, stdv, confidence,
2252  unique_identifier, descriptor) = xl
2253 
2254  if prot_list2 is None:
2255  if c1 not in prot_list:
2256  continue
2257  if c2 not in prot_list:
2258  continue
2259  else:
2260  if c1 in prot_list and c2 in prot_list2:
2261  pass
2262  elif c1 in prot_list2 and c2 in prot_list:
2263  pass
2264  else:
2265  continue
2266 
2267  if descriptor != "original":
2268  continue
2269 
2270  confidence_list.append(confidence)
2271 
2272  dists = self.cross_link_distances_unique[(r1, c1, r2, c2)]
2273  tmp_dist_binary = []
2274  for d in dists:
2275  if d < 35:
2276  tmp_dist_binary.append(1)
2277  else:
2278  tmp_dist_binary.append(0)
2279  tmp_matrix.append(tmp_dist_binary)
2280 
2281  matrix = list(zip(*tmp_matrix))
2282 
2283  satisfied_high_sum = 0
2284  satisfied_mid_sum = 0
2285  satisfied_low_sum = 0
2286  total_satisfied_sum = 0
2287  for k, m in enumerate(matrix):
2288  satisfied_high = 0
2289  total_high = 0
2290  satisfied_mid = 0
2291  total_mid = 0
2292  satisfied_low = 0
2293  total_low = 0
2294  total_satisfied = 0
2295  total = 0
2296  for n, b in enumerate(m):
2297  if confidence_list[n] == "0.01":
2298  total_high += 1
2299  if b == 1:
2300  satisfied_high += 1
2301  satisfied_high_sum += 1
2302  elif confidence_list[n] == "0.05":
2303  total_mid += 1
2304  if b == 1:
2305  satisfied_mid += 1
2306  satisfied_mid_sum += 1
2307  elif confidence_list[n] == "0.1":
2308  total_low += 1
2309  if b == 1:
2310  satisfied_low += 1
2311  satisfied_low_sum += 1
2312  if b == 1:
2313  total_satisfied += 1
2314  total_satisfied_sum += 1
2315  total += 1
2316  print(k, satisfied_high, total_high)
2317  print(k, satisfied_mid, total_mid)
2318  print(k, satisfied_low, total_low)
2319  print(k, total_satisfied, total)
2320  print(float(satisfied_high_sum) / len(matrix))
2321  print(float(satisfied_mid_sum) / len(matrix))
2322  print(float(satisfied_low_sum) / len(matrix))
2323 # ------------
2324 
2325  def get_unique_crosslinks_statistics(self, prot_list,
2326  prot_list2=None):
2327 
2328  print(prot_list)
2329  print(prot_list2)
2330  satisfied_high = 0
2331  total_high = 0
2332  satisfied_mid = 0
2333  total_mid = 0
2334  satisfied_low = 0
2335  total_low = 0
2336  total = 0
2337  tmp_matrix = []
2338  satisfied_string = []
2339  for xl in self.crosslinks:
2340  (r1, c1, r2, c2, mdist, stdv, confidence,
2341  unique_identifier, descriptor) = xl
2342 
2343  if prot_list2 is None:
2344  if c1 not in prot_list:
2345  continue
2346  if c2 not in prot_list:
2347  continue
2348  else:
2349  if c1 in prot_list and c2 in prot_list2:
2350  pass
2351  elif c1 in prot_list2 and c2 in prot_list:
2352  pass
2353  else:
2354  continue
2355 
2356  if descriptor != "original":
2357  continue
2358 
2359  total += 1
2360  if confidence == "0.01":
2361  total_high += 1
2362  if mdist <= 35:
2363  satisfied_high += 1
2364  if confidence == "0.05":
2365  total_mid += 1
2366  if mdist <= 35:
2367  satisfied_mid += 1
2368  if confidence == "0.1":
2369  total_low += 1
2370  if mdist <= 35:
2371  satisfied_low += 1
2372  if mdist <= 35:
2373  satisfied_string.append(1)
2374  else:
2375  satisfied_string.append(0)
2376 
2377  dists = self.cross_link_distances_unique[(r1, c1, r2, c2)]
2378  tmp_dist_binary = []
2379  for d in dists:
2380  if d < 35:
2381  tmp_dist_binary.append(1)
2382  else:
2383  tmp_dist_binary.append(0)
2384  tmp_matrix.append(tmp_dist_binary)
2385 
2386  print("unique satisfied_high/total_high", satisfied_high, "/",
2387  total_high)
2388  print("unique satisfied_mid/total_mid", satisfied_mid, "/", total_mid)
2389  print("unique satisfied_low/total_low", satisfied_low, "/", total_low)
2390  print("total", total)
2391 
2392  matrix = list(zip(*tmp_matrix))
2393  satisfied_models = 0
2394  satstr = ""
2395  for b in satisfied_string:
2396  if b == 0:
2397  satstr += "-"
2398  if b == 1:
2399  satstr += "*"
2400 
2401  for m in matrix:
2402  all_satisfied = True
2403  string = ""
2404  for n, b in enumerate(m):
2405  if b == 0:
2406  string += "0"
2407  if b == 1:
2408  string += "1"
2409  if b == 1 and satisfied_string[n] == 1:
2410  pass
2411  elif b == 1 and satisfied_string[n] == 0:
2412  pass
2413  elif b == 0 and satisfied_string[n] == 0:
2414  pass
2415  elif b == 0 and satisfied_string[n] == 1:
2416  all_satisfied = False
2417  if all_satisfied:
2418  satisfied_models += 1
2419  print(string)
2420  print(satstr, all_satisfied)
2421  print("models that satisfies the median satisfied cross-links/total "
2422  "models", satisfied_models, len(matrix))
2423 
2424  def plot_matrix_cross_link_distances_unique(self, figurename, prot_list,
2425  prot_list2=None):
2426 
2427  import matplotlib as mpl
2428  mpl.use('Agg')
2429  import matplotlib.pylab as pl
2430 
2431  tmp_matrix = []
2432  for kw in self.cross_link_distances_unique:
2433  (r1, c1, r2, c2) = kw
2434  dists = self.cross_link_distances_unique[kw]
2435 
2436  if prot_list2 is None:
2437  if c1 not in prot_list:
2438  continue
2439  if c2 not in prot_list:
2440  continue
2441  else:
2442  if c1 in prot_list and c2 in prot_list2:
2443  pass
2444  elif c1 in prot_list2 and c2 in prot_list:
2445  pass
2446  else:
2447  continue
2448  # append the sum of dists to order by that in the matrix plot
2449  dists.append(sum(dists))
2450  tmp_matrix.append(dists)
2451 
2452  tmp_matrix.sort(key=itemgetter(len(tmp_matrix[0]) - 1))
2453 
2454  # print len(tmp_matrix), len(tmp_matrix[0])-1
2455  matrix = np.zeros((len(tmp_matrix), len(tmp_matrix[0]) - 1))
2456 
2457  for i in range(len(tmp_matrix)):
2458  for k in range(len(tmp_matrix[i]) - 1):
2459  matrix[i][k] = tmp_matrix[i][k]
2460 
2461  print(matrix)
2462 
2463  fig = pl.figure()
2464  ax = fig.add_subplot(211)
2465 
2466  cax = ax.imshow(matrix, interpolation='nearest')
2467  fig.colorbar(cax)
2468  pl.savefig(figurename, dpi=300)
2469  pl.show()
2470 
2471  def plot_bars(
2472  self,
2473  filename,
2474  prots1,
2475  prots2,
2476  nxl_per_row=20,
2477  arrangement="inter",
2478  confidence_input="None"):
2479 
2480  data = []
2481  for xl in self.cross_link_distances:
2482  (r1, c1, r2, c2, mdist, confidence) = xl
2483  if c1 in prots1 and c2 in prots2:
2484  if arrangement == "inter" and c1 == c2:
2485  continue
2486  if arrangement == "intra" and c1 != c2:
2487  continue
2488  if confidence_input == confidence:
2489  label = str(c1) + ":" + str(r1) + \
2490  "-" + str(c2) + ":" + str(r2)
2491  values = self.cross_link_distances[xl]
2492  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
2493  data.append((label, values, mdist, frequency))
2494 
2495  sort_by_dist = sorted(data, key=lambda tup: tup[2])
2496  sort_by_dist = list(zip(*sort_by_dist))
2497  values = sort_by_dist[1]
2498  positions = list(range(len(values)))
2499  labels = sort_by_dist[0]
2500  frequencies = list(map(float, sort_by_dist[3]))
2501  frequencies = [f * 10.0 for f in frequencies]
2502 
2503  nchunks = int(float(len(values)) / nxl_per_row)
2504  values_chunks = IMP.pmi.tools.chunk_list_into_segments(values, nchunks)
2505  positions_chunks = IMP.pmi.tools.chunk_list_into_segments(
2506  positions,
2507  nchunks)
2508  frequencies_chunks = IMP.pmi.tools.chunk_list_into_segments(
2509  frequencies,
2510  nchunks)
2511  labels_chunks = IMP.pmi.tools.chunk_list_into_segments(labels, nchunks)
2512 
2513  for n, v in enumerate(values_chunks):
2514  p = positions_chunks[n]
2515  f = frequencies_chunks[n]
2517  filename + "." + str(n), v, p, f,
2518  valuename="Distance (Ang)",
2519  positionname="Unique " + arrangement + " Crosslinks",
2520  xlabels=labels_chunks[n])
2521 
2522  def crosslink_distance_histogram(self, filename,
2523  prot_list=None,
2524  prot_list2=None,
2525  confidence_classes=None,
2526  bins=40,
2527  color='#66CCCC',
2528  yplotrange=[0, 1],
2529  format="png",
2530  normalized=False):
2531  if prot_list is None:
2532  prot_list = list(self.prot_length_dict.keys())
2533 
2534  distances = []
2535  for xl in self.crosslinks:
2536  (r1, c1, r2, c2, mdist, stdv, confidence,
2537  unique_identifier, descriptor) = xl
2538 
2539  if confidence_classes is not None:
2540  if confidence not in confidence_classes:
2541  continue
2542 
2543  if prot_list2 is None:
2544  if c1 not in prot_list:
2545  continue
2546  if c2 not in prot_list:
2547  continue
2548  else:
2549  if c1 in prot_list and c2 in prot_list2:
2550  pass
2551  elif c1 in prot_list2 and c2 in prot_list:
2552  pass
2553  else:
2554  continue
2555 
2556  distances.append(mdist)
2557 
2559  filename, distances, valuename="C-alpha C-alpha distance [Ang]",
2560  bins=bins, color=color,
2561  format=format,
2562  reference_xline=35.0,
2563  yplotrange=yplotrange, normalized=normalized)
2564 
2565  def scatter_plot_xl_features(self, filename,
2566  feature1=None,
2567  feature2=None,
2568  prot_list=None,
2569  prot_list2=None,
2570  yplotrange=None,
2571  reference_ylines=None,
2572  distance_color=True,
2573  format="png"):
2574  import matplotlib as mpl
2575  mpl.use('Agg')
2576  import matplotlib.pyplot as plt
2577 
2578  fig = plt.figure(figsize=(10, 10))
2579  ax = fig.add_subplot(111)
2580 
2581  for xl in self.crosslinks:
2582  (r1, c1, r2, c2, mdist, stdv, confidence,
2583  unique_identifier, arrangement) = xl
2584 
2585  if prot_list2 is None:
2586  if c1 not in prot_list:
2587  continue
2588  if c2 not in prot_list:
2589  continue
2590  else:
2591  if c1 in prot_list and c2 in prot_list2:
2592  pass
2593  elif c1 in prot_list2 and c2 in prot_list:
2594  pass
2595  else:
2596  continue
2597 
2598  xldb = self.external_csv_data[unique_identifier]
2599  xldb.update({"Distance": mdist})
2600  xldb.update({"Distance_stdv": stdv})
2601 
2602  xvalue = float(xldb[feature1])
2603  yvalue = float(xldb[feature2])
2604 
2605  if distance_color:
2606  color = self.colormap(mdist)
2607  else:
2608  color = "gray"
2609 
2610  ax.plot([xvalue], [yvalue], 'o', c=color, alpha=0.1, markersize=7)
2611 
2612  if yplotrange is not None:
2613  ax.set_ylim(yplotrange)
2614  if reference_ylines is not None:
2615  for rl in reference_ylines:
2616  ax.axhline(rl, color='red', linestyle='dashed', linewidth=1)
2617 
2618  if filename:
2619  plt.savefig(filename + "." + format, dpi=150, transparent="False")
2620 
2621  plt.show()
def get_output
Returns output for IMP.pmi.output.Output object.
Simple 3D transformation class.
Visualization of cross-links.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
double get_drms(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
A class for reading stat files (either rmf or ascii v1 and v2)
Definition: output.py:935
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
def plot_field_histogram
Plot a list of histograms from a value list.
Definition: output.py:1758
def plot_fields_box_plots
Plot time series as boxplots.
Definition: output.py:1823
double get_drmsd(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1)
Calculate distance the root mean square deviation between two sets of 3D points.
Definition: atom/distance.h:49
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1175
Miscellaneous utilities.
Definition: tools.py:1
def __init__
Constructor.
void handle_use_deprecated(std::string message)
Break in this method in gdb to find deprecated uses at runtime.
double get_weighted_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2, const Floats &weights)
Compute the RMSD (without alignment) taking into account the copy ambiguity.
void link_restraints(RMF::FileConstHandle fh, const Restraints &hs)
A class to evaluate the precision of an ensemble.
log
Definition: log.py:1
A class to cluster structures.
def get_rmsd_wrt_reference_structure_with_alignment
First align then calculate RMSD.
def __init__
Constructor.
Definition: pmi/Analysis.py:31
Class for sampling a density map from particles.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
def add_structure
Read a structure into the ensemble and store (as coordinates).
DensityMap * multiply(const DensityMap *m1, const DensityMap *m2)
Return a density map for which voxel i contains the result of m1[i]*m2[i].
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:22
def add_subunits_density
Add a frame to the densities.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
Definition: DensityMap.h:674
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:555
double get_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
Basic utilities for handling cryo-electron microscopy 3D density maps.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name.
def fill
Add coordinates for a single model.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
def do_cluster
Run K-means clustering.
3D rotation class.
Definition: Rotation3D.h:52
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:505
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def set_reference_structure
Read in a structure used for reference computation.
def get_rmsf
Calculate the residue mean square fluctuations (RMSF).
def get_average_distance_wrt_reference_structure
Compare the structure set to the reference structure.
def get_density
Get the current density for some component name.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
The general base class for IMP exceptions.
Definition: exception.h:48
def get_precision
Evaluate the precision of two named structure groups.
VectorD< 3 > Vector3D
Definition: VectorD.h:408
Restraints create_restraints(RMF::FileConstHandle fh, Model *m)
def get_moldict_coorddict
return data structure for the RMSD calculation
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def get_particles_at_resolution_ten
Get particles at res 10, or any beads, based on the name.
Python classes to represent, score, sample and analyze models.
def add_structures
Read a list of RMFs, supports parallel.
Transformation3D get_transformation_aligning_first_to_second(Vector3Ds a, Vector3Ds b)
Hierarchies get_leaves(const Selection &h)
double get_drmsd_Q(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1, double threshold)
Definition: atom/distance.h:85
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Compute mean density maps from structures.
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:512
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27