IMP logo
IMP Reference Guide  develop.7f24702912,2026/04/21
The Integrative Modeling Platform
pmi/Analysis.py
1 #!/usr/bin/env python
2 
3 """@namespace IMP.pmi.analysis
4  Tools for clustering and cluster analysis
5 """
6 import IMP
7 import IMP.algebra
8 import IMP.em
9 import IMP.pmi
10 import IMP.pmi.tools
11 import IMP.pmi.output
12 import IMP.rmf
13 import RMF
14 import IMP.pmi.analysis
15 from operator import itemgetter
16 from math import log, sqrt
17 import itertools
18 import numpy as np
19 
20 
21 class Alignment:
22  """Performs alignment and RMSD calculation for two sets of coordinates
23 
24  The class also takes into account non-equal stoichiometry of the proteins.
25  If this is the case, the protein names of proteins in multiple copies
26  should be specified in the following form:
27  nameA..1, nameA..2 (note two dots).
28  """
29 
30  def __init__(self, template, query, weights=None):
31  """Constructor.
32  @param query {'p1':coords(L,3), 'p2':coords(L,3)}
33  @param template {'p1':coords(L,3), 'p2':coords(L,3)}
34  @param weights optional weights for each set of coordinates
35  """
36  self.query = query
37  self.template = template
38  self.weights = weights
39 
40  if len(self.query.keys()) != len(self.template.keys()):
41  raise ValueError('''the number of proteins
42  in template and query does not match!''')
43 
44  def permute(self):
45  # get unique protein names for each protein
46  # this is, unfortunately, expecting that names are all 'Molname..X'
47  # where X is different for each copy.
48  self.proteins = sorted(self.query.keys())
49  prots_uniq = [i.split('..')[0] for i in self.proteins]
50 
51  # for each unique name, store list of permutations
52  # e.g. for keys A..1,A..2 store P[A] = [[A..1,A..2],[A..2,A..1]]
53  # then store the product: [[[A1,A2],[B1,B2]],[[A1,A2],[B2,B1]],
54  # [[A2,A1],[B1,B2]],[[A2,A1],[B2,B1]]]
55  P = {}
56  for p in prots_uniq:
57  copies = [i for i in self.proteins if i.split('..')[0] == p]
58  prmts = list(itertools.permutations(copies, len(copies)))
59  P[p] = prmts
60  self.P = P
61  self.Product = list(itertools.product(*P.values()))
62 
63  def get_rmsd(self):
64 
65  self.permute()
66 
67  template_xyz = []
68  weights = []
69  torder = sum([list(i) for i in self.Product[0]], [])
70  for t in torder:
71  template_xyz += [IMP.algebra.Vector3D(i) for i in self.template[t]]
72  if self.weights is not None:
73  weights += [i for i in self.weights[t]]
74 
75  self.rmsd = 10000000000.
76  for comb in self.Product:
77 
78  order = sum([list(i) for i in comb], [])
79  query_xyz = []
80  for p in order:
81  query_xyz += [IMP.algebra.Vector3D(i) for i in self.query[p]]
82 
83  if self.weights is not None:
85  template_xyz, query_xyz, weights)
86  else:
87  dist = IMP.algebra.get_rmsd(template_xyz, query_xyz)
88  if dist < self.rmsd:
89  self.rmsd = dist
90  return self.rmsd
91 
92  def align(self):
93  from scipy.spatial.distance import cdist
94 
95  self.permute()
96 
97  # create flat coordinate list from template in standard order
98  # then loop through the permutations and try to align and get RMSD
99  # should allow you to try all mappings within each protein
100  template_xyz = []
101  torder = sum([list(i) for i in self.Product[0]], [])
102  for t in torder:
103  template_xyz += [IMP.algebra.Vector3D(i) for i in self.template[t]]
104  # template_xyz = np.array(template_xyz)
105  self.rmsd, Transformation = 10000000000., ''
106 
107  # then for each permutation, get flat list of coords and get RMSD
108  for comb in self.Product:
109  order = sum([list(i) for i in comb], [])
110  query_xyz = []
111  for p in order:
112  query_xyz += [IMP.algebra.Vector3D(i) for i in self.query[p]]
113 
114  if len(template_xyz) != len(query_xyz):
115  raise ValueError('''the number of coordinates
116  in template and query does not match!''')
117 
118  transformation = \
120  query_xyz, template_xyz)
121  query_xyz_tr = [transformation.get_transformed(n)
122  for n in query_xyz]
123 
124  dist = sqrt(
125  sum(np.diagonal(cdist(template_xyz, query_xyz_tr) ** 2))
126  / len(template_xyz))
127  if dist < self.rmsd:
128  self.rmsd = dist
129  Transformation = transformation
130 
131  # return the transformation
132  return (self.rmsd, Transformation)
133 
134 
135 # TEST for the alignment ###
136 """
137 Proteins = {'a..1':np.array([np.array([-1.,1.])]),
138  'a..2':np.array([np.array([1.,1.,])]),
139  'a..3':np.array([np.array([-2.,1.])]),
140  'b':np.array([np.array([0.,-1.])]),
141  'c..1':np.array([np.array([-1.,-1.])]),
142  'c..2':np.array([np.array([1.,-1.])]),
143  'd':np.array([np.array([0.,0.])]),
144  'e':np.array([np.array([0.,1.])])}
145 
146 Ali = Alignment(Proteins, Proteins)
147 Ali.permute()
148 if Ali.get_rmsd() == 0.0: print 'successful test!'
149 else: print 'ERROR!'; exit()
150 """
151 
152 
153 # ----------------------------------
154 class Violations:
155 
156  def __init__(self, filename):
157 
158  self.violation_thresholds = {}
159  self.violation_counts = {}
160 
161  with open(filename) as data:
162  D = data.readlines()
163 
164  for d in D:
165  d = d.strip().split()
166  self.violation_thresholds[d[0]] = float(d[1])
167 
168  def get_number_violated_restraints(self, rsts_dict):
169  num_violated = 0
170  for rst in self.violation_thresholds:
171  if rst not in rsts_dict:
172  continue # print rst;
173  if float(rsts_dict[rst]) > self.violation_thresholds[rst]:
174  num_violated += 1
175  if rst not in self.violation_counts:
176  self.violation_counts[rst] = 1
177  else:
178  self.violation_counts[rst] += 1
179  return num_violated
180 
181 
183  """A class to cluster structures.
184  Uses scipy's cdist function to compute distance matrices
185  and sklearn's kmeans clustering module.
186  """
187  def __init__(self, rmsd_weights=None):
188  """Constructor.
189  @param rmsd_weights Flat list of weights for each particle
190  (if they're coarse)
191  """
192  try:
193  from mpi4py import MPI
194  self.comm = MPI.COMM_WORLD
195  self.rank = self.comm.Get_rank()
196  self.number_of_processes = self.comm.size
197  except ImportError:
198  self.number_of_processes = 1
199  self.rank = 0
200  self.all_coords = {}
201  self.structure_cluster_ids = None
202  self.tmpl_coords = None
203  self.rmsd_weights = rmsd_weights
204 
205  def set_template(self, part_coords):
206  self.tmpl_coords = part_coords
207 
208  def fill(self, frame, Coords):
209  """Add coordinates for a single model."""
210  self.all_coords[frame] = Coords
211 
212  def dist_matrix(self):
213  self.model_list_names = list(self.all_coords.keys())
214  self.model_indexes = list(range(len(self.model_list_names)))
215  self.model_indexes_dict = dict(
216  list(zip(self.model_list_names, self.model_indexes)))
217  model_indexes_unique_pairs = \
218  list(itertools.combinations(self.model_indexes, 2))
219 
220  my_model_indexes_unique_pairs = IMP.pmi.tools.chunk_list_into_segments(
221  model_indexes_unique_pairs,
222  self.number_of_processes)[self.rank]
223 
224  print("process %s assigned with %s pairs"
225  % (str(self.rank), str(len(my_model_indexes_unique_pairs))))
226 
227  (raw_distance_dict, self.transformation_distance_dict) = \
228  self.matrix_calculation(self.all_coords, self.tmpl_coords,
229  my_model_indexes_unique_pairs)
230 
231  if self.number_of_processes > 1:
232  raw_distance_dict = IMP.pmi.tools.scatter_and_gather(
233  raw_distance_dict)
234  pickable_transformations = \
235  self.get_pickable_transformation_distance_dict()
236  pickable_transformations = IMP.pmi.tools.scatter_and_gather(
237  pickable_transformations)
238  self.set_transformation_distance_dict_from_pickable(
239  pickable_transformations)
240 
241  self.raw_distance_matrix = np.zeros(
242  (len(self.model_list_names), len(self.model_list_names)))
243  for item in raw_distance_dict:
244  (f1, f2) = item
245  self.raw_distance_matrix[f1, f2] = raw_distance_dict[item]
246  self.raw_distance_matrix[f2, f1] = raw_distance_dict[item]
247 
248  def get_dist_matrix(self):
249  return self.raw_distance_matrix
250 
251  def do_cluster(self, number_of_clusters, seed=None):
252  """Run K-means clustering
253  @param number_of_clusters Num means
254  @param seed the random seed
255  """
256  from sklearn.cluster import KMeans
257  if seed is not None:
258  np.random.seed(seed)
259  try:
260  # check whether we have the right version of sklearn
261  kmeans = KMeans(n_clusters=number_of_clusters)
262  except TypeError:
263  # sklearn older than 0.12
264  kmeans = KMeans(k=number_of_clusters)
265  kmeans.fit_predict(self.raw_distance_matrix)
266 
267  self.structure_cluster_ids = kmeans.labels_
268 
269  def get_pickable_transformation_distance_dict(self):
270  pickable_transformations = {}
271  for label in self.transformation_distance_dict:
272  tr = self.transformation_distance_dict[label]
273  trans = tuple(tr.get_translation())
274  rot = tuple(tr.get_rotation().get_quaternion())
275  pickable_transformations[label] = (rot, trans)
276  return pickable_transformations
277 
278  def set_transformation_distance_dict_from_pickable(
279  self,
280  pickable_transformations):
281  self.transformation_distance_dict = {}
282  for label in pickable_transformations:
283  tr = pickable_transformations[label]
284  trans = IMP.algebra.Vector3D(tr[1])
285  rot = IMP.algebra.Rotation3D(tr[0])
286  self.transformation_distance_dict[
287  label] = IMP.algebra.Transformation3D(rot, trans)
288 
289  def save_distance_matrix_file(self, file_name='cluster.rawmatrix.pkl'):
290  import pickle
291 
292  # to pickle the transformation dictionary
293  # you have to save the arrays corresponding to
294  # the transformations
295 
296  pickable_transformations = \
297  self.get_pickable_transformation_distance_dict()
298 
299  with open(file_name + ".data", 'wb') as outf:
300  pickle.dump(
301  (self.structure_cluster_ids,
302  self.model_list_names,
303  pickable_transformations),
304  outf)
305 
306  np.save(file_name + ".npy", self.raw_distance_matrix)
307 
308  def load_distance_matrix_file(self, file_name='cluster.rawmatrix.pkl'):
309  import pickle
310 
311  with open(file_name + ".data", 'rb') as inputf:
312  (self.structure_cluster_ids, self.model_list_names,
313  pickable_transformations) = pickle.load(inputf)
314 
315  self.raw_distance_matrix = np.load(file_name + ".npy")
316 
317  self.set_transformation_distance_dict_from_pickable(
318  pickable_transformations)
319  self.model_indexes = list(range(len(self.model_list_names)))
320  self.model_indexes_dict = dict(
321  list(zip(self.model_list_names, self.model_indexes)))
322 
323  def plot_matrix(self, figurename="clustermatrix.pdf"):
324  import matplotlib as mpl
325  mpl.use('Agg')
326  import matplotlib.pylab as pl
327  from scipy.cluster import hierarchy as hrc
328 
329  fig = pl.figure(figsize=(10, 8))
330  ax = fig.add_subplot(212)
331  dendrogram = hrc.dendrogram(
332  hrc.linkage(self.raw_distance_matrix),
333  color_threshold=7,
334  no_labels=True)
335  leaves_order = dendrogram['leaves']
336  ax.set_xlabel('Model')
337  ax.set_ylabel('RMSD [Angstroms]')
338 
339  ax2 = fig.add_subplot(221)
340  cax = ax2.imshow(
341  self.raw_distance_matrix[leaves_order,
342  :][:,
343  leaves_order],
344  interpolation='nearest')
345  cb = fig.colorbar(cax)
346  cb.set_label('RMSD [Angstroms]')
347  ax2.set_xlabel('Model')
348  ax2.set_ylabel('Model')
349 
350  pl.savefig(figurename, dpi=300)
351  pl.close(fig)
352 
353  def get_model_index_from_name(self, name):
354  return self.model_indexes_dict[name]
355 
356  def get_cluster_labels(self):
357  # this list
358  return list(set(self.structure_cluster_ids))
359 
360  def get_number_of_clusters(self):
361  return len(self.get_cluster_labels())
362 
363  def get_cluster_label_indexes(self, label):
364  return (
365  [i for i, l in enumerate(self.structure_cluster_ids) if l == label]
366  )
367 
368  def get_cluster_label_names(self, label):
369  return (
370  [self.model_list_names[i]
371  for i in self.get_cluster_label_indexes(label)]
372  )
373 
374  def get_cluster_label_average_rmsd(self, label):
375 
376  indexes = self.get_cluster_label_indexes(label)
377 
378  if len(indexes) > 1:
379  sub_distance_matrix = self.raw_distance_matrix[
380  indexes, :][:, indexes]
381  average_rmsd = np.sum(sub_distance_matrix) / \
382  (len(sub_distance_matrix)
383  ** 2 - len(sub_distance_matrix))
384  else:
385  average_rmsd = 0.0
386  return average_rmsd
387 
388  def get_cluster_label_size(self, label):
389  return len(self.get_cluster_label_indexes(label))
390 
391  def get_transformation_to_first_member(
392  self,
393  cluster_label,
394  structure_index):
395  reference = self.get_cluster_label_indexes(cluster_label)[0]
396  return self.transformation_distance_dict[(reference, structure_index)]
397 
398  def matrix_calculation(self, all_coords, template_coords, list_of_pairs):
399 
400  model_list_names = list(all_coords.keys())
401  rmsd_protein_names = list(all_coords[model_list_names[0]].keys())
402  raw_distance_dict = {}
403  transformation_distance_dict = {}
404  if template_coords is None:
405  do_alignment = False
406  else:
407  do_alignment = True
408  alignment_template_protein_names = list(template_coords.keys())
409 
410  for (f1, f2) in list_of_pairs:
411 
412  if not do_alignment:
413  # here we only get the rmsd,
414  # we need that for instance when you want to cluster
415  # conformations globally, eg the EM map is a reference
417 
418  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
419  for pr in rmsd_protein_names])
420  coords_f2 = {}
421  for pr in rmsd_protein_names:
422  coords_f2[pr] = all_coords[model_list_names[f2]][pr]
423 
424  Ali = Alignment(coords_f1, coords_f2, self.rmsd_weights)
425  rmsd = Ali.get_rmsd()
426 
427  elif do_alignment:
428  # here we actually align the conformations first
429  # and than calculate the rmsd. We need that when the
430  # protein(s) is the reference
431  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
432  for pr in alignment_template_protein_names])
433  coords_f2 = dict([(pr, all_coords[model_list_names[f2]][pr])
434  for pr in alignment_template_protein_names])
435 
436  Ali = Alignment(coords_f1, coords_f2)
437  template_rmsd, transformation = Ali.align()
438 
439  # here we calculate the rmsd
440  # we will align two models based n the number of subunits
441  # provided and transform coordinates of model 2 to model 1
442  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
443  for pr in rmsd_protein_names])
444  coords_f2 = {}
445  for pr in rmsd_protein_names:
446  coords_f2[pr] = [transformation.get_transformed(
447  i) for i in all_coords[model_list_names[f2]][pr]]
448 
449  Ali = Alignment(coords_f1, coords_f2, self.rmsd_weights)
450  rmsd = Ali.get_rmsd()
451 
452  raw_distance_dict[(f1, f2)] = rmsd
453  raw_distance_dict[(f2, f1)] = rmsd
454  transformation_distance_dict[(f1, f2)] = transformation
455  transformation_distance_dict[(f2, f1)] = transformation
456 
457  return raw_distance_dict, transformation_distance_dict
458 
459 
460 class RMSD:
461  """Compute the RMSD (without alignment) taking into account the copy
462  ambiguity.
463  To be used with pmi2 hierarchies. Can be used for instance as follows:
464 
465  rmsd = IMP.pmi.analysis.RMSD(
466  hier, hier, [mol.get_name() for mol in mols],
467  dynamic0=True, dynamic1=False)
468  output_objects.append(rmsd)
469 
470  before shuffling the coordinates
471  """
472 
473  def __init__(self, hier0, hier1, molnames, label="None", dynamic0=True,
474  dynamic1=True, metric=IMP.algebra.get_rmsd):
475  """
476  @param hier0 first input hierarchy
477  @param hier1 second input hierarchy
478  @param molnames the names of the molecules used for the RMSD
479  @dynamic0 if True stores the decorators XYZ and coordinates of
480  hier0 can be update. If false coordinates are static
481  (stored in Vector3Ds) and will never be updated
482  @dynamic1 same as above
483  metric what metric should be used
484  """
485  self.moldict0, self.molcoords0, self.mol_XYZs0 = \
486  self.get_moldict_coorddict(hier0, molnames)
487  self.moldict1, self.molcoords1, self.mol_XYZs1 = \
488  self.get_moldict_coorddict(hier1, molnames)
489  self.dynamic0 = dynamic0
490  self.dynamic1 = dynamic1
491  self.metric = metric
492  self.label = label
493 
494  def get_moldict_coorddict(self, hier, molnames):
495  """return data structure for the RMSD calculation"""
496  moldict = {}
497  mol_coords = {}
498  mol_XYZs = {}
499  for mol in IMP.pmi.tools.get_molecules(hier):
500  name = mol.get_name()
501  if name not in molnames:
502  continue
503  parts = True
504  mol_coords[mol] = []
505  mol_XYZs[mol] = []
506  i = 1
507  while parts:
508  sel = IMP.atom.Selection(
509  mol, residue_index=i,
510  representation_type=IMP.atom.BALLS,
511  resolution=1)
512  parts = sel.get_selected_particles()
513  if parts:
514  mol_coords[mol].append(
515  IMP.core.XYZ(parts[0]).get_coordinates())
516  mol_XYZs[mol].append(IMP.core.XYZ(parts[0]))
517  i = i+1
518  if name in moldict:
519  moldict[name].append(mol)
520  else:
521  moldict[name] = [mol]
522  return moldict, mol_coords, mol_XYZs
523 
524  def get_rmsd_and_assigments(self):
525  total_rmsd = 0
526  total_N = 0
527  best_assignments = []
528  rmsd_dict = {}
529  for molname, ref_mols in self.moldict1.items():
530  ref_coords = []
531  for ref_mol in ref_mols:
532  if self.dynamic1:
533  coord1 = [XYZ.get_coordinates()
534  for XYZ in self.mol_XYZs1[ref_mol]]
535  else:
536  coord1 = self.molcoords1[ref_mol]
537  ref_coords += coord1
538 
539  rmsd = []
540  rmf_mols_list = []
541  for rmf_mols in itertools.permutations(self.moldict0[molname]):
542  rmf_coords = []
543  for rmf_mol in rmf_mols:
544  if self.dynamic0:
545  coord0 = [XYZ.get_coordinates()
546  for XYZ in self.mol_XYZs0[rmf_mol]]
547  else:
548  coord0 = self.molcoords0[rmf_mol]
549  rmf_coords += coord0
550 
551  rmsd.append(IMP.algebra.get_rmsd(ref_coords, rmf_coords))
552  rmf_mols_list.append(rmf_mols)
553  m = min(rmsd)
554  rmf_mols_best_order = rmf_mols_list[rmsd.index(m)]
555 
556  for n, (ref_mol, rmf_mol) in enumerate(
557  zip(ref_mols, rmf_mols_best_order)):
558  best_assignments.append((rmf_mol, ref_mol))
559  if self.dynamic0:
560  coord0 = [XYZ.get_coordinates()
561  for XYZ in self.mol_XYZs0[rmf_mol]]
562  else:
563  coord0 = self.molcoords0[rmf_mol]
564 
565  if self.dynamic1:
566  coord1 = [XYZ.get_coordinates()
567  for XYZ in self.mol_XYZs1[ref_mol]]
568  else:
569  coord1 = self.molcoords1[ref_mol]
570  rmsd_pair = self.metric(coord1, coord0)
571  N = len(self.molcoords1[ref_mol])
572  total_N += N
573  total_rmsd += rmsd_pair*rmsd_pair*N
574  rmsd_dict[ref_mol] = rmsd_pair
575  total_rmsd = sqrt(total_rmsd/total_N)
576  return total_rmsd, best_assignments
577 
578  def get_output(self):
579  """Returns output for IMP.pmi.output.Output object"""
580  total_rmsd, best_assignments = self.get_rmsd_and_assigments()
581 
582  assignments_out = []
583  for rmf_mol, ref_mol in best_assignments:
584  ref_name = ref_mol.get_name()
585  ref_copy = IMP.atom.Copy(ref_mol).get_copy_index()
586  rmf_name = rmf_mol.get_name()
587  rmf_copy = IMP.atom.Copy(rmf_mol).get_copy_index()
588  assignments_out.append(rmf_name + "." + str(rmf_copy) + "->" +
589  ref_name + "." + str(ref_copy))
590  return {"RMSD_" + self.label: str(total_rmsd),
591  "RMSD_assignments_" + self.label: str(assignments_out)}
592 
593 
594 class Precision:
595  """A class to evaluate the precision of an ensemble.
596 
597  Also can evaluate the cross-precision of multiple ensembles.
598  Supports MPI for coordinate reading.
599  Recommended procedure:
600  -# initialize object and pass the selection for evaluating precision
601  -# call add_structures() to read in the data (specify group name)
602  -# call get_precision() to evaluate inter/intra precision
603  -# call get_rmsf() to evaluate within-group fluctuations
604  """
605  def __init__(self, model, resolution=1, selection_dictionary={}):
606  """Constructor.
607  @param model The IMP Model
608  @param resolution Use 1 or 10 (kluge: requires that "_Res:X" is
609  part of the hier name)
610  @param selection_dictionary Dictionary where keys are names for
611  selections and values are selection tuples for scoring
612  precision. "All" is automatically made as well
613  """
614  try:
615  from mpi4py import MPI
616  self.comm = MPI.COMM_WORLD
617  self.rank = self.comm.Get_rank()
618  self.number_of_processes = self.comm.size
619  except ImportError:
620  self.number_of_processes = 1
621  self.rank = 0
622 
623  self.styles = ['pairwise_rmsd', 'pairwise_drmsd_k', 'pairwise_drmsd_Q',
624  'pairwise_drms_k', 'pairwise_rmsd', 'drmsd_from_center']
625  self.style = 'pairwise_drmsd_k'
626  self.structures_dictionary = {}
627  self.reference_structures_dictionary = {}
628  self.prots = []
629  self.protein_names = None
630  self.len_particles_resolution_one = None
631  self.model = model
632  self.rmf_names_frames = {}
633  self.reference_rmf_names_frames = None
634  self.reference_structure = None
635  self.reference_prot = None
636  self.selection_dictionary = selection_dictionary
637  self.threshold = 40.0
638  self.residue_particle_index_map = None
639  self.prots = None
640  if resolution in [1, 10]:
641  self.resolution = resolution
642  else:
643  raise KeyError("Currently only allow resolution 1 or 10")
644 
645  def _get_structure(self, rmf_frame_index, rmf_name):
646  """Read an RMF file and return the particles"""
647  rh = RMF.open_rmf_file_read_only(rmf_name)
648  if not self.prots:
649  print("getting coordinates for frame %i rmf file %s"
650  % (rmf_frame_index, rmf_name))
651  self.prots = IMP.rmf.create_hierarchies(rh, self.model)
652  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
653  else:
654  print("linking coordinates for frame %i rmf file %s"
655  % (rmf_frame_index, rmf_name))
656  IMP.rmf.link_hierarchies(rh, self.prots)
657  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
658  del rh
659 
660  if self.resolution == 1:
661  particle_dict = get_particles_at_resolution_one(self.prots[0])
662  elif self.resolution == 10:
663  particle_dict = get_particles_at_resolution_ten(self.prots[0])
664 
665  protein_names = list(particle_dict.keys())
666  particles_resolution_one = []
667  for k in particle_dict:
668  particles_resolution_one += (particle_dict[k])
669 
670  if self.protein_names is None:
671  self.protein_names = protein_names
672  else:
673  if self.protein_names != protein_names:
674  print("Error: the protein names of the new coordinate set "
675  "is not compatible with the previous one")
676 
677  if self.len_particles_resolution_one is None:
678  self.len_particles_resolution_one = len(particles_resolution_one)
679  else:
680  if self.len_particles_resolution_one != \
681  len(particles_resolution_one):
682  raise ValueError("the new coordinate set is not compatible "
683  "with the previous one")
684 
685  return particles_resolution_one
686 
687  def add_structure(self,
688  rmf_name,
689  rmf_frame_index,
690  structure_set_name,
691  setup_index_map=False):
692  """ Read a structure into the ensemble and store (as coordinates).
693  @param rmf_name The name of the RMF file
694  @param rmf_frame_index The frame to read
695  @param structure_set_name Name for the set that includes this structure
696  (e.g. "cluster 1")
697  @param setup_index_map if requested, set up a dictionary to help
698  find residue indexes
699  """
700 
701  # decide where to put this structure
702  if structure_set_name in self.structures_dictionary:
703  cdict = self.structures_dictionary[structure_set_name]
704  rmflist = self.rmf_names_frames[structure_set_name]
705  else:
706  self.structures_dictionary[structure_set_name] = {}
707  self.rmf_names_frames[structure_set_name] = []
708  cdict = self.structures_dictionary[structure_set_name]
709  rmflist = self.rmf_names_frames[structure_set_name]
710 
711  # read the particles
712  try:
713  particles_resolution_one = self._get_structure(
714  rmf_frame_index, rmf_name)
715  except ValueError:
716  print("something wrong with the rmf")
717  return 0
718  self.selection_dictionary.update({"All": self.protein_names})
719 
720  for selection_name in self.selection_dictionary:
721  selection_tuple = self.selection_dictionary[selection_name]
722  coords = self._select_coordinates(
723  selection_tuple, particles_resolution_one, self.prots[0])
724  if selection_name not in cdict:
725  cdict[selection_name] = [coords]
726  else:
727  cdict[selection_name].append(coords)
728 
729  rmflist.append((rmf_name, rmf_frame_index))
730 
731  # if requested, set up a dictionary to help find residue indexes
732  if setup_index_map:
733  self.residue_particle_index_map = {}
734  for prot_name in self.protein_names:
735  self.residue_particle_index_map[prot_name] = \
736  self._get_residue_particle_index_map(
737  prot_name, particles_resolution_one, self.prots[0])
738 
739  def add_structures(self,
740  rmf_name_frame_tuples,
741  structure_set_name):
742  """Read a list of RMFs, supports parallel
743  @param rmf_name_frame_tuples list of (rmf_file_name,frame_number)
744  @param structure_set_name Name this set of structures
745  (e.g. "cluster.1")
746  """
747 
748  # split up the requested list to read in parallel
749  my_rmf_name_frame_tuples = IMP.pmi.tools.chunk_list_into_segments(
750  rmf_name_frame_tuples, self.number_of_processes)[self.rank]
751  for nfr, tup in enumerate(my_rmf_name_frame_tuples):
752  rmf_name = tup[0]
753  rmf_frame_index = tup[1]
754  # the first frame stores the map between residues and particles
755  if self.residue_particle_index_map is None:
756  setup_index_map = True
757  else:
758  setup_index_map = False
759  self.add_structure(rmf_name,
760  rmf_frame_index,
761  structure_set_name,
762  setup_index_map)
763 
764  # synchronize the structures
765  if self.number_of_processes > 1:
766  self.rmf_names_frames = IMP.pmi.tools.scatter_and_gather(
767  self.rmf_names_frames)
768  if self.rank != 0:
769  self.comm.send(self.structures_dictionary, dest=0, tag=11)
770  elif self.rank == 0:
771  for i in range(1, self.number_of_processes):
772  data_tmp = self.comm.recv(source=i, tag=11)
773  for key in self.structures_dictionary:
774  self.structures_dictionary[key].update(data_tmp[key])
775  for i in range(1, self.number_of_processes):
776  self.comm.send(self.structures_dictionary, dest=i, tag=11)
777  if self.rank != 0:
778  self.structures_dictionary = self.comm.recv(source=0, tag=11)
779 
780  def _get_residue_particle_index_map(self, prot_name, structure, hier):
781  # Creates map from all particles to residue numbers
782  residue_particle_index_map = []
783  s = IMP.atom.Selection(hier, molecules=[prot_name], resolution=1)
784  all_selected_particles = s.get_selected_particles()
785  intersection = list(set(all_selected_particles) & set(structure))
786  sorted_intersection = IMP.pmi.tools.sort_by_residues(intersection)
787  for p in sorted_intersection:
788  residue_particle_index_map.append(
790  return residue_particle_index_map
791 
792  def _select_coordinates(self, tuple_selections, structure, prot):
793  selected_coordinates = []
794  for t in tuple_selections:
795  if isinstance(t, tuple) and len(t) == 3:
796  s = IMP.atom.Selection(
797  prot, molecules=[t[2]],
798  residue_indexes=range(t[0], t[1]+1), resolution=1)
799  all_selected_particles = s.get_selected_particles()
800  intersection = list(set(all_selected_particles)
801  & set(structure))
802  sorted_intersection = \
803  IMP.pmi.tools.sort_by_residues(intersection)
804  cc = [tuple(IMP.core.XYZ(p).get_coordinates())
805  for p in sorted_intersection]
806  selected_coordinates += cc
807  elif isinstance(t, str):
808  s = IMP.atom.Selection(prot, molecules=[t], resolution=1)
809  all_selected_particles = s.get_selected_particles()
810  intersection = list(set(all_selected_particles)
811  & set(structure))
812  sorted_intersection = \
813  IMP.pmi.tools.sort_by_residues(intersection)
814  cc = [tuple(IMP.core.XYZ(p).get_coordinates())
815  for p in sorted_intersection]
816  selected_coordinates += cc
817  else:
818  raise ValueError("Selection error")
819  return selected_coordinates
820 
821  def set_threshold(self, threshold):
822  self.threshold = threshold
823 
824  def _get_distance(self, structure_set_name1, structure_set_name2,
825  selection_name, index1, index2):
826  """Compute distance between structures with various metrics"""
827  c1 = self.structures_dictionary[
828  structure_set_name1][selection_name][index1]
829  c2 = self.structures_dictionary[
830  structure_set_name2][selection_name][index2]
831  coordinates1 = [IMP.algebra.Vector3D(c) for c in c1]
832  coordinates2 = [IMP.algebra.Vector3D(c) for c in c2]
833 
834  if self.style == 'pairwise_drmsd_k':
835  distance = IMP.atom.get_drmsd(coordinates1, coordinates2)
836  if self.style == 'pairwise_drms_k':
837  distance = IMP.atom.get_drms(coordinates1, coordinates2)
838  if self.style == 'pairwise_drmsd_Q':
839  distance = IMP.atom.get_drmsd_Q(coordinates1, coordinates2,
840  self.threshold)
841 
842  if self.style == 'pairwise_rmsd':
843  distance = IMP.algebra.get_rmsd(coordinates1, coordinates2)
844  return distance
845 
846  def _get_particle_distances(self, structure_set_name1, structure_set_name2,
847  selection_name, index1, index2):
848  c1 = self.structures_dictionary[
849  structure_set_name1][selection_name][index1]
850  c2 = self.structures_dictionary[
851  structure_set_name2][selection_name][index2]
852 
853  coordinates1 = [IMP.algebra.Vector3D(c) for c in c1]
854  coordinates2 = [IMP.algebra.Vector3D(c) for c in c2]
855 
856  distances = [np.linalg.norm(a-b)
857  for (a, b) in zip(coordinates1, coordinates2)]
858 
859  return distances
860 
861  def get_precision(self, structure_set_name1, structure_set_name2,
862  outfile=None, skip=1, selection_keywords=None):
863  """ Evaluate the precision of two named structure groups. Supports MPI.
864  When the structure_set_name1 is different from the structure_set_name2,
865  this evaluates the cross-precision (average pairwise distances).
866  @param outfile Name of the precision output file
867  @param structure_set_name1 string name of the first structure set
868  @param structure_set_name2 string name of the second structure set
869  @param skip analyze every (skip) structure for the distance
870  matrix calculation
871  @param selection_keywords Specify the selection name you want to
872  calculate on. By default this is computed for everything
873  you provided in the constructor, plus all the subunits together.
874  """
875  if selection_keywords is None:
876  sel_keys = list(self.selection_dictionary.keys())
877  else:
878  for k in selection_keywords:
879  if k not in self.selection_dictionary:
880  raise KeyError(
881  "you are trying to find named selection "
882  + k + " which was not requested in the constructor")
883  sel_keys = selection_keywords
884 
885  if outfile is not None:
886  of = open(outfile, "w")
887  centroid_index = 0
888  for selection_name in sel_keys:
889  number_of_structures_1 = len(self.structures_dictionary[
890  structure_set_name1][selection_name])
891  number_of_structures_2 = len(self.structures_dictionary[
892  structure_set_name2][selection_name])
893  distances = {}
894  structure_pointers_1 = list(range(0, number_of_structures_1, skip))
895  structure_pointers_2 = list(range(0, number_of_structures_2, skip))
896  pair_combination_list = list(
897  itertools.product(structure_pointers_1, structure_pointers_2))
898  if len(pair_combination_list) == 0:
899  raise ValueError("no structure selected. Check the "
900  "skip parameter.")
901 
902  # compute pairwise distances in parallel
903  my_pair_combination_list = IMP.pmi.tools.chunk_list_into_segments(
904  pair_combination_list, self.number_of_processes)[self.rank]
905  for n, pair in enumerate(my_pair_combination_list):
906  distances[pair] = self._get_distance(
907  structure_set_name1, structure_set_name2, selection_name,
908  pair[0], pair[1])
909  if self.number_of_processes > 1:
910  distances = IMP.pmi.tools.scatter_and_gather(distances)
911 
912  # Finally compute distance to centroid
913  if self.rank == 0:
914  if structure_set_name1 == structure_set_name2:
915  structure_pointers = structure_pointers_1
916  number_of_structures = number_of_structures_1
917 
918  # calculate the distance from the first centroid
919  # and determine the centroid
920  distance = 0.0
921  distances_to_structure = {}
922  distances_to_structure_normalization = {}
923 
924  for n in structure_pointers:
925  distances_to_structure[n] = 0.0
926  distances_to_structure_normalization[n] = 0
927 
928  for k in distances:
929  distance += distances[k]
930  distances_to_structure[k[0]] += distances[k]
931  distances_to_structure[k[1]] += distances[k]
932  distances_to_structure_normalization[k[0]] += 1
933  distances_to_structure_normalization[k[1]] += 1
934 
935  for n in structure_pointers:
936  distances_to_structure[n] = \
937  distances_to_structure[n] \
938  / distances_to_structure_normalization[n]
939 
940  min_distance = min([distances_to_structure[n]
941  for n in distances_to_structure])
942  centroid_index = [k for k, v in
943  distances_to_structure.items()
944  if v == min_distance][0]
945  centroid_rmf_name = \
946  self.rmf_names_frames[
947  structure_set_name1][centroid_index]
948 
949  centroid_distance = 0.0
950  distance_list = []
951  for n in range(number_of_structures):
952  dist = self._get_distance(
953  structure_set_name1, structure_set_name1,
954  selection_name, centroid_index, n)
955  centroid_distance += dist
956  distance_list.append(dist)
957 
958  centroid_distance /= number_of_structures
959  if outfile is not None:
960  of.write(str(selection_name) + " " +
961  structure_set_name1 +
962  " average centroid distance " +
963  str(centroid_distance) + "\n")
964  of.write(str(selection_name) + " " +
965  structure_set_name1 + " centroid index " +
966  str(centroid_index) + "\n")
967  of.write(str(selection_name) + " " +
968  structure_set_name1 + " centroid rmf name " +
969  str(centroid_rmf_name) + "\n")
970  of.write(str(selection_name) + " " +
971  structure_set_name1 +
972  " median centroid distance " +
973  str(np.median(distance_list)) + "\n")
974 
975  average_pairwise_distances = sum(
976  distances.values())/len(list(distances.values()))
977  if outfile is not None:
978  of.write(str(selection_name) + " " + structure_set_name1 +
979  " " + structure_set_name2 +
980  " average pairwise distance " +
981  str(average_pairwise_distances) + "\n")
982  if outfile is not None:
983  of.close()
984  return centroid_index
985 
986  def get_rmsf(self, structure_set_name, outdir="./", skip=1,
987  set_plot_yaxis_range=None):
988  """ Calculate the residue mean square fluctuations (RMSF).
989  Automatically outputs as data file and pdf
990  @param structure_set_name Which structure set to calculate RMSF for
991  @param outdir Where to write the files
992  @param skip Skip this number of structures
993  @param set_plot_yaxis_range In case you need to change the plot
994  """
995  # get the centroid structure for the whole complex
996  centroid_index = self.get_precision(structure_set_name,
997  structure_set_name,
998  outfile=None,
999  skip=skip)
1000  if self.rank == 0:
1001  for sel_name in self.protein_names:
1002  self.selection_dictionary.update({sel_name: [sel_name]})
1003  try:
1004  number_of_structures = \
1005  len(self.structures_dictionary[
1006  structure_set_name][sel_name])
1007  except KeyError:
1008  # that protein was not included in the selection
1009  continue
1010  rpim = self.residue_particle_index_map[sel_name]
1011  outfile = outdir+"/rmsf."+sel_name+".dat"
1012  of = open(outfile, "w")
1013  residue_distances = {}
1014  residue_nblock = {}
1015  for index in range(number_of_structures):
1016  distances = self._get_particle_distances(
1017  structure_set_name, structure_set_name, sel_name,
1018  centroid_index, index)
1019  for nblock, block in enumerate(rpim):
1020  for residue_number in block:
1021  residue_nblock[residue_number] = nblock
1022  if residue_number not in residue_distances:
1023  residue_distances[residue_number] = \
1024  [distances[nblock]]
1025  else:
1026  residue_distances[
1027  residue_number].append(distances[nblock])
1028 
1029  residues = []
1030  rmsfs = []
1031  for rn in residue_distances:
1032  residues.append(rn)
1033  rmsf = np.std(residue_distances[rn])
1034  rmsfs.append(rmsf)
1035  of.write(str(rn) + " " + str(residue_nblock[rn])
1036  + " " + str(rmsf) + "\n")
1037 
1038  IMP.pmi.output.plot_xy_data(
1039  residues, rmsfs, title=sel_name,
1040  out_fn=outdir+"/rmsf."+sel_name, display=False,
1041  set_plot_yaxis_range=set_plot_yaxis_range,
1042  xlabel='Residue Number', ylabel='Standard error')
1043  of.close()
1044 
1045  def set_reference_structure(self, rmf_name, rmf_frame_index):
1046  """Read in a structure used for reference computation.
1047  Needed before calling get_average_distance_wrt_reference_structure()
1048  @param rmf_name The RMF file to read the reference
1049  @param rmf_frame_index The index in that file
1050  """
1051  particles_resolution_one = self._get_structure(rmf_frame_index,
1052  rmf_name)
1053  self.reference_rmf_names_frames = (rmf_name, rmf_frame_index)
1054 
1055  for selection_name in self.selection_dictionary:
1056  selection_tuple = self.selection_dictionary[selection_name]
1057  coords = self._select_coordinates(
1058  selection_tuple, particles_resolution_one, self.prots[0])
1059  self.reference_structures_dictionary[selection_name] = coords
1060 
1062  self, structure_set_name, alignment_selection_key):
1063  """First align then calculate RMSD
1064  @param structure_set_name: the name of the structure set
1065  @param alignment_selection_key: the key containing the selection tuples
1066  needed to make the alignment stored in self.selection_dictionary
1067  @return: for each structure in the structure set, returns the rmsd
1068  """
1069  ret = {}
1070  if self.reference_structures_dictionary == {}:
1071  print("Cannot compute until you set a reference structure")
1072  return
1073 
1074  align_reference_coordinates = \
1075  self.reference_structures_dictionary[alignment_selection_key]
1076  align_coordinates = self.structures_dictionary[
1077  structure_set_name][alignment_selection_key]
1078  transformations = []
1079  for c in align_coordinates:
1081  {"All": align_reference_coordinates}, {"All": c})
1082  transformation = Ali.align()[1]
1083  transformations.append(transformation)
1084  for selection_name in self.selection_dictionary:
1085  reference_coordinates = \
1086  self.reference_structures_dictionary[selection_name]
1087  coordinates2 = [IMP.algebra.Vector3D(c)
1088  for c in reference_coordinates]
1089  distances = []
1090  for n, sc in enumerate(self.structures_dictionary[
1091  structure_set_name][selection_name]):
1092  coordinates1 = [
1093  transformations[n].get_transformed(IMP.algebra.Vector3D(c))
1094  for c in sc]
1095  distance = IMP.algebra.get_rmsd(coordinates1, coordinates2)
1096  distances.append(distance)
1097  ret[selection_name] = {
1098  'all_distances': distances,
1099  'average_distance': sum(distances)/len(distances),
1100  'minimum_distance': min(distances)}
1101  return ret
1102 
1103  def _get_median(self, list_of_values):
1104  return np.median(np.array(list_of_values))
1105 
1106  def get_average_distance_wrt_reference_structure(self, structure_set_name):
1107  """Compare the structure set to the reference structure.
1108  @param structure_set_name The structure set to compute this on
1109  @note First call set_reference_structure()
1110  """
1111  ret = {}
1112  if self.reference_structures_dictionary == {}:
1113  print("Cannot compute until you set a reference structure")
1114  return
1115  for selection_name in self.selection_dictionary:
1116  reference_coordinates = self.reference_structures_dictionary[
1117  selection_name]
1118  coordinates2 = [IMP.algebra.Vector3D(c)
1119  for c in reference_coordinates]
1120  distances = []
1121  for sc in self.structures_dictionary[
1122  structure_set_name][selection_name]:
1123  coordinates1 = [IMP.algebra.Vector3D(c) for c in sc]
1124  if self.style == 'pairwise_drmsd_k':
1125  distance = IMP.atom.get_drmsd(coordinates1, coordinates2)
1126  if self.style == 'pairwise_drms_k':
1127  distance = IMP.atom.get_drms(coordinates1, coordinates2)
1128  if self.style == 'pairwise_drmsd_Q':
1129  distance = IMP.atom.get_drmsd_Q(
1130  coordinates1, coordinates2, self.threshold)
1131  if self.style == 'pairwise_rmsd':
1132  distance = IMP.algebra.get_rmsd(coordinates1, coordinates2)
1133  distances.append(distance)
1134 
1135  print(selection_name, "average distance",
1136  sum(distances)/len(distances), "minimum distance",
1137  min(distances), 'nframes', len(distances))
1138  ret[selection_name] = {
1139  'average_distance': sum(distances)/len(distances),
1140  'minimum_distance': min(distances)}
1141  return ret
1142 
1143  def get_coordinates(self):
1144  pass
1145 
1146  def set_precision_style(self, style):
1147  if style in self.styles:
1148  self.style = style
1149  else:
1150  raise ValueError("No such style")
1151 
1152 
1154  """Compute mean density maps from structures.
1155 
1156  Keeps a dictionary of density maps,
1157  keys are in the custom ranges. When you call add_subunits_density, it adds
1158  particle coordinates to the existing density maps.
1159  """
1160 
1161  def __init__(self, custom_ranges, representation=None, resolution=20.0,
1162  voxel=5.0):
1163  """Constructor.
1164  @param custom_ranges Required. It's a dictionary, keys are the
1165  density component names, values are selection tuples
1166  e.g. {'kin28':[['kin28',1,-1]],
1167  'density_name_1' :[('ccl1')],
1168  'density_name_2' :[(1,142,'tfb3d1'),
1169  (143,700,'tfb3d2')],
1170  @param resolution The MRC resolution of the output map (in
1171  Angstrom unit)
1172  @param voxel The voxel size for the output map (lower is slower)
1173  """
1174 
1175  if representation:
1177  "The 'representation' argument is deprecated "
1178  "(and is ignored). Pass hierarchies to "
1179  "add_subunits_density() instead.")
1180  self.MRCresolution = resolution
1181  self.voxel = voxel
1182  self.densities = {}
1183  self.count_models = 0.0
1184  self.custom_ranges = custom_ranges
1185 
1186  def add_subunits_density(self, hierarchy):
1187  """Add a frame to the densities.
1188  @param hierarchy The hierarchy to add.
1189  """
1190  self.count_models += 1.0
1191 
1192  part_dict = get_particles_at_resolution_one(hierarchy)
1193  all_particles_by_resolution = []
1194  for name in part_dict:
1195  all_particles_by_resolution += part_dict[name]
1196 
1197  for density_name in self.custom_ranges:
1198  parts = []
1199  all_particles_by_segments = []
1200 
1201  for seg in self.custom_ranges[density_name]:
1202  if isinstance(seg, str):
1203  s = IMP.atom.Selection(hierarchy, molecule=seg)
1204  elif isinstance(seg, tuple) and len(seg) == 2:
1205  s = IMP.atom.Selection(
1206  hierarchy, molecule=seg[0], copy_index=seg[1])
1207  elif isinstance(seg, tuple) and len(seg) == 3:
1208  s = IMP.atom.Selection(
1209  hierarchy, molecule=seg[2],
1210  residue_indexes=range(seg[0], seg[1] + 1))
1211  elif isinstance(seg, tuple) and len(seg) == 4:
1212  s = IMP.atom.Selection(
1213  hierarchy, molecule=seg[2],
1214  residue_indexes=range(seg[0], seg[1] + 1),
1215  copy_index=seg[3])
1216  else:
1217  raise Exception('could not understand selection tuple '
1218  + str(seg))
1219 
1220  all_particles_by_segments += s.get_selected_particles()
1221  parts = all_particles_by_segments
1222  self._create_density_from_particles(parts, density_name)
1223 
1224  def normalize_density(self):
1225  pass
1226 
1227  def _create_density_from_particles(self, ps, name,
1228  kernel_type='GAUSSIAN'):
1229  '''Internal function for adding to densities.
1230  pass XYZR particles with mass and create a density from them.
1231  kernel type options are GAUSSIAN, BINARIZED_SPHERE, and SPHERE.'''
1232  dmap = IMP.em.SampledDensityMap(ps, self.MRCresolution, self.voxel)
1233  dmap.calcRMS()
1234  dmap.set_was_used(True)
1235  if name not in self.densities:
1236  self.densities[name] = dmap
1237  else:
1238  bbox1 = IMP.em.get_bounding_box(self.densities[name])
1239  bbox2 = IMP.em.get_bounding_box(dmap)
1240  bbox1 += bbox2
1241  dmap3 = IMP.em.create_density_map(bbox1, self.voxel)
1242  dmap3.set_was_used(True)
1243  dmap3.add(dmap)
1244  dmap3.add(self.densities[name])
1245  self.densities[name] = dmap3
1246 
1247  def get_density_keys(self):
1248  return list(self.densities.keys())
1249 
1250  def get_density(self, name):
1251  """Get the current density for some component name"""
1252  if name not in self.densities:
1253  return None
1254  else:
1255  return self.densities[name]
1256 
1257  def write_mrc(self, path="./", suffix=None):
1258  import os
1259  for density_name in self.densities:
1260  self.densities[density_name].multiply(1. / self.count_models)
1261  if suffix is None:
1262  name = path + "/" + density_name + ".mrc"
1263  else:
1264  name = path + "/" + density_name + "." + suffix + ".mrc"
1265  path, file = os.path.split(name)
1266  os.makedirs(path, exist_ok=True)
1267  IMP.em.write_map(self.densities[density_name], name,
1269 
1270 
1271 class GetContactMap:
1272 
1273  def __init__(self, distance=15.):
1274  self.distance = distance
1275  self.contactmap = ''
1276  self.namelist = []
1277  self.xlinks = 0
1278  self.XL = {}
1279  self.expanded = {}
1280  self.resmap = {}
1281 
1282  def set_prot(self, prot):
1283  self.prot = prot
1284  self.protnames = []
1285 
1286  particles_dictionary = get_particles_at_resolution_one(self.prot)
1287 
1288  for name in particles_dictionary:
1289  residue_indexes = []
1290  for p in particles_dictionary[name]:
1291  print(p.get_name())
1292  residue_indexes.extend(IMP.pmi.tools.get_residue_indexes(p))
1293 
1294  if len(residue_indexes) != 0:
1295  self.protnames.append(name)
1296 
1297  def get_subunit_coords(self, frame, align=0):
1298  from scipy.spatial.distance import cdist
1299  coords = []
1300  radii = []
1301  namelist = []
1302  for part in self.prot.get_children():
1303  SortedSegments = []
1304  print(part)
1305  for chl in part.get_children():
1306  start = IMP.atom.get_leaves(chl)[0]
1307 
1308  startres = IMP.atom.Fragment(start).get_residue_indexes()[0]
1309  SortedSegments.append((chl, startres))
1310  SortedSegments = sorted(SortedSegments, key=itemgetter(1))
1311 
1312  for sgmnt in SortedSegments:
1313  for leaf in IMP.atom.get_leaves(sgmnt[0]):
1314  p = IMP.core.XYZR(leaf)
1315  crd = np.array([p.get_x(), p.get_y(), p.get_z()])
1316 
1317  coords.append(crd)
1318  radii.append(p.get_radius())
1319 
1320  new_name = part.get_name() + '_' + sgmnt[0].get_name() + \
1321  '_' + \
1322  str(IMP.atom.Fragment(leaf)
1323  .get_residue_indexes()[0])
1324  namelist.append(new_name)
1325  self.expanded[new_name] = len(
1326  IMP.atom.Fragment(leaf).get_residue_indexes())
1327  if part.get_name() not in self.resmap:
1328  self.resmap[part.get_name()] = {}
1329  for res in IMP.atom.Fragment(leaf).get_residue_indexes():
1330  self.resmap[part.get_name()][res] = new_name
1331 
1332  coords = np.array(coords)
1333  radii = np.array(radii)
1334  if len(self.namelist) == 0:
1335  self.namelist = namelist
1336  self.contactmap = np.zeros((len(coords), len(coords)))
1337  distances = cdist(coords, coords)
1338  distances = (distances - radii).T - radii
1339  distances = distances <= self.distance
1340  self.contactmap += distances
1341 
1342  def add_xlinks(self, filename,
1343  identification_string='ISDCrossLinkMS_Distance_'):
1344  self.xlinks = 1
1345  with open(filename) as data:
1346  D = data.readlines()
1347 
1348  for d in D:
1349  if identification_string in d:
1350  d = d.replace("_", " ").replace("-", " ").replace(
1351  ":", " ").split()
1352 
1353  t1, t2 = (d[0], d[1]), (d[1], d[0])
1354  if t1 not in self.XL:
1355  self.XL[t1] = [(int(d[2]) + 1, int(d[3]) + 1)]
1356  self.XL[t2] = [(int(d[3]) + 1, int(d[2]) + 1)]
1357  else:
1358  self.XL[t1].append((int(d[2]) + 1, int(d[3]) + 1))
1359  self.XL[t2].append((int(d[3]) + 1, int(d[2]) + 1))
1360 
1361  def dist_matrix(self, skip_cmap=0, skip_xl=1, outname=None):
1362  K = self.namelist
1363  M = self.contactmap
1364  C, R = [], []
1365  proteins = self.protnames
1366 
1367  # exp new
1368  if skip_cmap == 0:
1369  Matrices = {}
1370  proteins = [p.get_name() for p in self.prot.get_children()]
1371  missing = []
1372  for p1 in range(len(proteins)):
1373  for p2 in range(p1, len(proteins)):
1374  pl1, pl2 = (max(self.resmap[proteins[p1]].keys()),
1375  max(self.resmap[proteins[p2]].keys()))
1376  pn1, pn2 = proteins[p1], proteins[p2]
1377  mtr = np.zeros((pl1 + 1, pl2 + 1))
1378  print('Creating matrix for: ',
1379  p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1380  for i1 in range(1, pl1 + 1):
1381  for i2 in range(1, pl2 + 1):
1382  try:
1383  r1 = K.index(self.resmap[pn1][i1])
1384  r2 = K.index(self.resmap[pn2][i2])
1385  r = M[r1, r2]
1386  mtr[i1 - 1, i2 - 1] = r
1387  except KeyError:
1388  missing.append((pn1, pn2, i1, i2))
1389  Matrices[(pn1, pn2)] = mtr
1390 
1391  # add cross-links
1392  if skip_xl == 0:
1393  if self.XL == {}:
1394  raise ValueError("cross-links were not provided, use "
1395  "add_xlinks function!")
1396  Matrices_xl = {}
1397  for p1 in range(len(proteins)):
1398  for p2 in range(p1, len(proteins)):
1399  pl1, pl2 = (max(self.resmap[proteins[p1]].keys()),
1400  max(self.resmap[proteins[p2]].keys()))
1401  pn1, pn2 = proteins[p1], proteins[p2]
1402  mtr = np.zeros((pl1 + 1, pl2 + 1))
1403  flg = 0
1404  try:
1405  xls = self.XL[(pn1, pn2)]
1406  except KeyError:
1407  try:
1408  xls = self.XL[(pn2, pn1)]
1409  flg = 1
1410  except KeyError:
1411  flg = 2
1412  if flg == 0:
1413  print('Creating matrix for: ',
1414  p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1415  for xl1, xl2 in xls:
1416  if xl1 > pl1:
1417  print('X' * 10, xl1, xl2)
1418  xl1 = pl1
1419  if xl2 > pl2:
1420  print('X' * 10, xl1, xl2)
1421  xl2 = pl2
1422  mtr[xl1 - 1, xl2 - 1] = 100
1423  elif flg == 1:
1424  print('Creating matrix for: ',
1425  p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1426  for xl1, xl2 in xls:
1427  if xl1 > pl1:
1428  print('X' * 10, xl1, xl2)
1429  xl1 = pl1
1430  if xl2 > pl2:
1431  print('X' * 10, xl1, xl2)
1432  xl2 = pl2
1433  mtr[xl2 - 1, xl1 - 1] = 100
1434  else:
1435  print('No cross-links between: ', pn1, pn2)
1436  Matrices_xl[(pn1, pn2)] = mtr
1437 
1438  # make list of protein names and create coordinate lists
1439  C = proteins
1440  # W is the component length list,
1441  # R is the contiguous coordinates list
1442  W, R = [], []
1443  for i, c in enumerate(C):
1444  cl = max(self.resmap[c].keys())
1445  W.append(cl)
1446  if i == 0:
1447  R.append(cl)
1448  else:
1449  R.append(R[-1] + cl)
1450 
1451  # start plotting
1452  if outname:
1453  # Don't require a display
1454  import matplotlib as mpl
1455  mpl.use('Agg')
1456  import matplotlib.pyplot as plt
1457  import matplotlib.gridspec as gridspec
1458  import scipy.sparse as sparse
1459 
1460  plt.figure()
1461  gs = gridspec.GridSpec(len(W), len(W),
1462  width_ratios=W,
1463  height_ratios=W)
1464 
1465  cnt = 0
1466  for x1, r1 in enumerate(R):
1467  for x2, r2 in enumerate(R):
1468  ax = plt.subplot(gs[cnt])
1469  if skip_cmap == 0:
1470  try:
1471  mtr = Matrices[(C[x1], C[x2])]
1472  except KeyError:
1473  mtr = Matrices[(C[x2], C[x1])].T
1474  ax.imshow(
1475  np.log(mtr),
1476  interpolation='nearest',
1477  vmin=0.,
1478  vmax=log(mtr.max()) if mtr.max() > 0. else 1.)
1479  ax.set_xticks([])
1480  ax.set_yticks([])
1481  if skip_xl == 0:
1482  try:
1483  mtr = Matrices_xl[(C[x1], C[x2])]
1484  except KeyError:
1485  mtr = Matrices_xl[(C[x2], C[x1])].T
1486  ax.spy(
1487  sparse.csr_matrix(mtr),
1488  markersize=10,
1489  color='white',
1490  linewidth=100,
1491  alpha=0.5)
1492  ax.set_xticks([])
1493  ax.set_yticks([])
1494 
1495  cnt += 1
1496  if x2 == 0:
1497  ax.set_ylabel(C[x1], rotation=90)
1498  if outname:
1499  plt.savefig(outname + ".pdf", dpi=300, transparent="False")
1500  else:
1501  plt.show()
1502 
1503 
1504 # ------------------------------------------------------------------
1505 # a few random tools
1506 
1507 def get_hiers_from_rmf(model, frame_number, rmf_file):
1508  # I have to deprecate this function
1509  print("getting coordinates for frame %i rmf file %s"
1510  % (frame_number, rmf_file))
1511 
1512  # load the frame
1513  rh = RMF.open_rmf_file_read_only(rmf_file)
1514 
1515  try:
1516  prots = IMP.rmf.create_hierarchies(rh, model)
1517  except IOError:
1518  print("Unable to open rmf file %s" % (rmf_file))
1519  return None
1520 
1521  try:
1522  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1523  except IOError:
1524  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1525  return None
1526  model.update()
1527  del rh
1528 
1529  return prots
1530 
1531 
1532 def link_hiers_to_rmf(model, hiers, frame_number, rmf_file):
1533  print("linking hierarchies for frame %i rmf file %s"
1534  % (frame_number, rmf_file))
1535  rh = RMF.open_rmf_file_read_only(rmf_file)
1536  IMP.rmf.link_hierarchies(rh, hiers)
1537  try:
1538  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1539  except: # noqa: E722
1540  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1541  return False
1542  model.update()
1543  del rh
1544  return True
1545 
1546 
1547 def get_hiers_and_restraints_from_rmf(model, frame_number, rmf_file):
1548  # I have to deprecate this function
1549  print("getting coordinates for frame %i rmf file %s"
1550  % (frame_number, rmf_file))
1551 
1552  # load the frame
1553  rh = RMF.open_rmf_file_read_only(rmf_file)
1554 
1555  try:
1556  prots = IMP.rmf.create_hierarchies(rh, model)
1557  rs = IMP.rmf.create_restraints(rh, model)
1558  except IOError:
1559  print("Unable to open rmf file %s" % (rmf_file))
1560  return None, None
1561  try:
1562  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1563  except: # noqa: E722
1564  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1565  return None, None
1566  model.update()
1567  del rh
1568 
1569  return prots, rs
1570 
1571 
1572 def link_hiers_and_restraints_to_rmf(model, hiers, rs, frame_number, rmf_file):
1573  print("linking hierarchies for frame %i rmf file %s"
1574  % (frame_number, rmf_file))
1575  rh = RMF.open_rmf_file_read_only(rmf_file)
1576  IMP.rmf.link_hierarchies(rh, hiers)
1577  IMP.rmf.link_restraints(rh, rs)
1578  try:
1579  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1580  except: # noqa: E722
1581  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1582  return False
1583  model.update()
1584  del rh
1585  return True
1586 
1587 
1589  """Get particles at res 1, or any beads, based on the name.
1590  No Representation is needed. This is mainly used when the hierarchy
1591  is read from an RMF file.
1592 
1593  @return a dictionary of component names and their particles
1594  @note If the root node is named "System" or is a "State", do
1595  proper selection.
1596  """
1597  particle_dict = {}
1598 
1599  # attempt to give good results for PMI2
1600  for mol in IMP.atom.get_by_type(prot, IMP.atom.MOLECULE_TYPE):
1601  sel = IMP.atom.Selection(mol, resolution=1)
1602  particle_dict[mol.get_name()] = sel.get_selected_particles()
1603  return particle_dict
1604 
1605 
1607  """Get particles at res 10, or any beads, based on the name.
1608  No Representation is needed.
1609  This is mainly used when the hierarchy is read from an RMF file.
1610 
1611  @return a dictionary of component names and their particles
1612  @note If the root node is named "System" or is a "State", do proper
1613  selection.
1614  """
1615  particle_dict = {}
1616  # attempt to give good results for PMI2
1617  for mol in IMP.atom.get_by_type(prot, IMP.atom.MOLECULE_TYPE):
1618  sel = IMP.atom.Selection(mol, resolution=10)
1619  particle_dict[mol.get_name()] = sel.get_selected_particles()
1620  return particle_dict
1621 
1622 
1624  """Visualization of cross-links"""
1625  def __init__(self):
1626  self.crosslinks = []
1627  self.external_csv_data = None
1628  self.crosslinkedprots = set()
1629  self.mindist = +10000000.0
1630  self.maxdist = -10000000.0
1631  self.contactmap = None
1632 
1633  def set_hierarchy(self, prot):
1634  self.prot_length_dict = {}
1635  self.model = prot.get_model()
1636 
1637  for i in prot.get_children():
1638  name = i.get_name()
1639  residue_indexes = []
1640  for p in IMP.atom.get_leaves(i):
1641  residue_indexes.extend(IMP.pmi.tools.get_residue_indexes(p))
1642 
1643  if len(residue_indexes) != 0:
1644  self.prot_length_dict[name] = max(residue_indexes)
1645 
1646  def set_coordinates_for_contact_map(self, rmf_name, rmf_frame_index):
1647  from scipy.spatial.distance import cdist
1648 
1649  rh = RMF.open_rmf_file_read_only(rmf_name)
1650  prots = IMP.rmf.create_hierarchies(rh, self.model)
1651  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
1652  print("getting coordinates for frame %i rmf file %s"
1653  % (rmf_frame_index, rmf_name))
1654  del rh
1655 
1656  coords = []
1657  radii = []
1658 
1659  particles_dictionary = get_particles_at_resolution_one(prots[0])
1660 
1661  resindex = 0
1662  self.index_dictionary = {}
1663 
1664  for name in particles_dictionary:
1665  residue_indexes = []
1666  for p in particles_dictionary[name]:
1667  print(p.get_name())
1668  residue_indexes = IMP.pmi.tools.get_residue_indexes(p)
1669 
1670  if len(residue_indexes) != 0:
1671 
1672  for res in range(min(residue_indexes),
1673  max(residue_indexes) + 1):
1674  d = IMP.core.XYZR(p)
1675 
1676  crd = np.array([d.get_x(), d.get_y(), d.get_z()])
1677  coords.append(crd)
1678  radii.append(d.get_radius())
1679  if name not in self.index_dictionary:
1680  self.index_dictionary[name] = [resindex]
1681  else:
1682  self.index_dictionary[name].append(resindex)
1683  resindex += 1
1684 
1685  coords = np.array(coords)
1686  radii = np.array(radii)
1687 
1688  distances = cdist(coords, coords)
1689  distances = (distances - radii).T - radii
1690 
1691  distances = np.where(distances <= 20.0, 1.0, 0)
1692  if self.contactmap is None:
1693  self.contactmap = np.zeros((len(coords), len(coords)))
1694  self.contactmap += distances
1695 
1696  for prot in prots:
1697  IMP.atom.destroy(prot)
1698 
1699  def set_crosslinks(
1700  self, data_file, search_label='ISDCrossLinkMS_Distance_',
1701  mapping=None, filter_label=None,
1702  # provide a list of rmf base names to filter the stat file
1703  filter_rmf_file_names=None, external_csv_data_file=None,
1704  external_csv_data_file_unique_id_key="Unique ID"):
1705 
1706  # example key ISDCrossLinkMS_Distance_intrarb_937-State:0-108:RPS3_55:RPS30-1-1-0.1_None # noqa: E501
1707  # mapping is a dictionary that maps standard keywords to entry
1708  # positions in the key string
1709  # confidence class is a filter that
1710  # external datafile is a datafile that contains further information
1711  # on the cross-links
1712  # it will use the unique id to create the dictionary keys
1713 
1714  po = IMP.pmi.output.ProcessOutput(data_file)
1715  keys = po.get_keys()
1716 
1717  xl_keys = [k for k in keys if search_label in k]
1718 
1719  if filter_rmf_file_names is not None:
1720  rmf_file_key = "local_rmf_file_name"
1721  fs = po.get_fields(xl_keys+[rmf_file_key])
1722  else:
1723  fs = po.get_fields(xl_keys)
1724 
1725  # this dictionary stores the occurency of given cross-links
1726  self.cross_link_frequency = {}
1727 
1728  # this dictionary stores the series of distances for given cross-linked
1729  # residues
1730  self.cross_link_distances = {}
1731 
1732  # this dictionary stores the series of distances for given cross-linked
1733  # residues
1734  self.cross_link_distances_unique = {}
1735 
1736  if external_csv_data_file is not None:
1737  # this dictionary stores the further information on cross-links
1738  # labeled by unique ID
1739  self.external_csv_data = {}
1740  xldb = IMP.pmi.tools.get_db_from_csv(external_csv_data_file)
1741 
1742  for xl in xldb:
1743  self.external_csv_data[
1744  xl[external_csv_data_file_unique_id_key]] = xl
1745 
1746  # this list keeps track the tuple of cross-links and sample
1747  # so that we don't count twice the same cross-linked residues in the
1748  # same sample
1749  cross_link_frequency_list = []
1750 
1751  self.unique_cross_link_list = []
1752 
1753  for key in xl_keys:
1754  print(key)
1755  keysplit = key.replace("_", " ").replace("-", " ").replace(
1756  ":", " ").split()
1757 
1758  if filter_label is not None:
1759  if filter_label not in keysplit:
1760  continue
1761 
1762  if mapping is None:
1763  r1 = int(keysplit[5])
1764  c1 = keysplit[6]
1765  r2 = int(keysplit[7])
1766  c2 = keysplit[8]
1767  try:
1768  confidence = keysplit[12]
1769  except: # noqa: E722
1770  confidence = '0.0'
1771  try:
1772  unique_identifier = keysplit[3]
1773  except: # noqa: E722
1774  unique_identifier = '0'
1775  else:
1776  r1 = int(keysplit[mapping["Residue1"]])
1777  c1 = keysplit[mapping["Protein1"]]
1778  r2 = int(keysplit[mapping["Residue2"]])
1779  c2 = keysplit[mapping["Protein2"]]
1780  try:
1781  confidence = keysplit[mapping["Confidence"]]
1782  except: # noqa: E722
1783  confidence = '0.0'
1784  try:
1785  unique_identifier = keysplit[mapping["Unique Identifier"]]
1786  except: # noqa: E722
1787  unique_identifier = '0'
1788 
1789  self.crosslinkedprots.add(c1)
1790  self.crosslinkedprots.add(c2)
1791 
1792  # construct the list of distances corresponding to the input rmf
1793  # files
1794 
1795  dists = []
1796  if filter_rmf_file_names is not None:
1797  for n, d in enumerate(fs[key]):
1798  if fs[rmf_file_key][n] in filter_rmf_file_names:
1799  dists.append(float(d))
1800  else:
1801  dists = [float(f) for f in fs[key]]
1802 
1803  # check if the input confidence class corresponds to the
1804  # one of the cross-link
1805 
1806  mdist = self.median(dists)
1807 
1808  stdv = np.std(np.array(dists))
1809  if self.mindist > mdist:
1810  self.mindist = mdist
1811  if self.maxdist < mdist:
1812  self.maxdist = mdist
1813 
1814  # calculate the frequency of unique cross-links within the same
1815  # sample
1816  if (r1, c1, r2, c2, mdist) not in cross_link_frequency_list:
1817  if (r1, c1, r2, c2) not in self.cross_link_frequency:
1818  self.cross_link_frequency[(r1, c1, r2, c2)] = 1
1819  self.cross_link_frequency[(r2, c2, r1, c1)] = 1
1820  else:
1821  self.cross_link_frequency[(r2, c2, r1, c1)] += 1
1822  self.cross_link_frequency[(r1, c1, r2, c2)] += 1
1823  cross_link_frequency_list.append((r1, c1, r2, c2))
1824  cross_link_frequency_list.append((r2, c2, r1, c1))
1825  self.unique_cross_link_list.append(
1826  (r1, c1, r2, c2, mdist))
1827 
1828  if (r1, c1, r2, c2) not in self.cross_link_distances:
1829  self.cross_link_distances[(
1830  r1, c1, r2, c2, mdist, confidence)] = dists
1831  self.cross_link_distances[(
1832  r2, c2, r1, c1, mdist, confidence)] = dists
1833  self.cross_link_distances_unique[(r1, c1, r2, c2)] = dists
1834  else:
1835  self.cross_link_distances[(
1836  r2, c2, r1, c1, mdist, confidence)] += dists
1837  self.cross_link_distances[(
1838  r1, c1, r2, c2, mdist, confidence)] += dists
1839 
1840  self.crosslinks.append(
1841  (r1, c1, r2, c2, mdist, stdv, confidence, unique_identifier,
1842  'original'))
1843  self.crosslinks.append(
1844  (r2, c2, r1, c1, mdist, stdv, confidence, unique_identifier,
1845  'reversed'))
1846 
1847  self.cross_link_frequency_inverted = {}
1848  for xl in self.unique_cross_link_list:
1849  (r1, c1, r2, c2, mdist) = xl
1850  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
1851  if frequency not in self.cross_link_frequency_inverted:
1852  self.cross_link_frequency_inverted[
1853  frequency] = [(r1, c1, r2, c2)]
1854  else:
1855  self.cross_link_frequency_inverted[
1856  frequency].append((r1, c1, r2, c2))
1857 
1858  # -------------
1859 
1860  def median(self, mylist):
1861  sorts = sorted(mylist)
1862  length = len(sorts)
1863  print(length)
1864  if length == 1:
1865  return mylist[0]
1866  if not length % 2:
1867  return (sorts[length / 2] + sorts[length / 2 - 1]) / 2.0
1868  return sorts[length / 2]
1869 
1870  def set_threshold(self, threshold):
1871  self.threshold = threshold
1872 
1873  def set_tolerance(self, tolerance):
1874  self.tolerance = tolerance
1875 
1876  def colormap(self, dist):
1877  if dist < self.threshold - self.tolerance:
1878  return "Green"
1879  elif dist >= self.threshold + self.tolerance:
1880  return "Orange"
1881  else:
1882  return "Red"
1883 
1884  def write_cross_link_database(self, filename, format='csv'):
1885  import csv
1886 
1887  fieldnames = [
1888  "Unique ID", "Protein1", "Residue1", "Protein2", "Residue2",
1889  "Median Distance", "Standard Deviation", "Confidence",
1890  "Frequency", "Arrangement"]
1891 
1892  if self.external_csv_data is not None:
1893  keys = list(self.external_csv_data.keys())
1894  innerkeys = list(self.external_csv_data[keys[0]].keys())
1895  innerkeys.sort()
1896  fieldnames += innerkeys
1897 
1898  dw = csv.DictWriter(
1899  open(filename, "w"),
1900  delimiter=',',
1901  fieldnames=fieldnames)
1902  dw.writeheader()
1903  for xl in self.crosslinks:
1904  (r1, c1, r2, c2, mdist, stdv, confidence,
1905  unique_identifier, descriptor) = xl
1906  if descriptor == 'original':
1907  outdict = {}
1908  outdict["Unique ID"] = unique_identifier
1909  outdict["Protein1"] = c1
1910  outdict["Protein2"] = c2
1911  outdict["Residue1"] = r1
1912  outdict["Residue2"] = r2
1913  outdict["Median Distance"] = mdist
1914  outdict["Standard Deviation"] = stdv
1915  outdict["Confidence"] = confidence
1916  outdict["Frequency"] = self.cross_link_frequency[
1917  (r1, c1, r2, c2)]
1918  if c1 == c2:
1919  arrangement = "Intra"
1920  else:
1921  arrangement = "Inter"
1922  outdict["Arrangement"] = arrangement
1923  if self.external_csv_data is not None:
1924  outdict.update(self.external_csv_data[unique_identifier])
1925 
1926  dw.writerow(outdict)
1927 
1928  def plot(self, prot_listx=None, prot_listy=None, no_dist_info=False,
1929  no_confidence_info=False, filter=None, layout="whole",
1930  crosslinkedonly=False, filename=None, confidence_classes=None,
1931  alphablend=0.1, scale_symbol_size=1.0, gap_between_components=0,
1932  rename_protein_map=None):
1933  # layout can be:
1934  # "lowerdiagonal" print only the lower diagonal plot
1935  # "upperdiagonal" print only the upper diagonal plot
1936  # "whole" print all
1937  # crosslinkedonly: plot only components that have cross-links
1938  # no_dist_info: if True will plot only the cross-links as grey spots
1939  # filter = tuple the tuple contains a keyword to be search
1940  # in the database
1941  # a relationship ">","==","<"
1942  # and a value
1943  # example ("ID_Score",">",40)
1944  # scale_symbol_size rescale the symbol for the cross-link
1945  # rename_protein_map is a dictionary to rename proteins
1946 
1947  import matplotlib as mpl
1948  mpl.use('Agg')
1949  import matplotlib.pyplot as plt
1950  import matplotlib.cm as cm
1951 
1952  fig = plt.figure(figsize=(10, 10))
1953  ax = fig.add_subplot(111)
1954 
1955  ax.set_xticks([])
1956  ax.set_yticks([])
1957 
1958  # set the list of proteins on the x axis
1959  if prot_listx is None:
1960  if crosslinkedonly:
1961  prot_listx = list(self.crosslinkedprots)
1962  else:
1963  prot_listx = list(self.prot_length_dict.keys())
1964  prot_listx.sort()
1965 
1966  nresx = gap_between_components + \
1967  sum([self.prot_length_dict[name]
1968  + gap_between_components for name in prot_listx])
1969 
1970  # set the list of proteins on the y axis
1971 
1972  if prot_listy is None:
1973  if crosslinkedonly:
1974  prot_listy = list(self.crosslinkedprots)
1975  else:
1976  prot_listy = list(self.prot_length_dict.keys())
1977  prot_listy.sort()
1978 
1979  nresy = gap_between_components + \
1980  sum([self.prot_length_dict[name]
1981  + gap_between_components for name in prot_listy])
1982 
1983  # this is the residue offset for each protein
1984  resoffsetx = {}
1985  resendx = {}
1986  res = gap_between_components
1987  for prot in prot_listx:
1988  resoffsetx[prot] = res
1989  res += self.prot_length_dict[prot]
1990  resendx[prot] = res
1991  res += gap_between_components
1992 
1993  resoffsety = {}
1994  resendy = {}
1995  res = gap_between_components
1996  for prot in prot_listy:
1997  resoffsety[prot] = res
1998  res += self.prot_length_dict[prot]
1999  resendy[prot] = res
2000  res += gap_between_components
2001 
2002  resoffsetdiagonal = {}
2003  res = gap_between_components
2004  for prot in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2005  resoffsetdiagonal[prot] = res
2006  res += self.prot_length_dict[prot]
2007  res += gap_between_components
2008 
2009  # plot protein boundaries
2010 
2011  xticks = []
2012  xlabels = []
2013  for n, prot in enumerate(prot_listx):
2014  res = resoffsetx[prot]
2015  end = resendx[prot]
2016  for proty in prot_listy:
2017  resy = resoffsety[proty]
2018  endy = resendy[proty]
2019  ax.plot([res, res], [resy, endy], 'k-', lw=0.4)
2020  ax.plot([end, end], [resy, endy], 'k-', lw=0.4)
2021  xticks.append((float(res) + float(end)) / 2)
2022  if rename_protein_map is not None:
2023  if prot in rename_protein_map:
2024  prot = rename_protein_map[prot]
2025  xlabels.append(prot)
2026 
2027  yticks = []
2028  ylabels = []
2029  for n, prot in enumerate(prot_listy):
2030  res = resoffsety[prot]
2031  end = resendy[prot]
2032  for protx in prot_listx:
2033  resx = resoffsetx[protx]
2034  endx = resendx[protx]
2035  ax.plot([resx, endx], [res, res], 'k-', lw=0.4)
2036  ax.plot([resx, endx], [end, end], 'k-', lw=0.4)
2037  yticks.append((float(res) + float(end)) / 2)
2038  if rename_protein_map is not None:
2039  if prot in rename_protein_map:
2040  prot = rename_protein_map[prot]
2041  ylabels.append(prot)
2042 
2043  # plot the contact map
2044  print(prot_listx, prot_listy)
2045 
2046  if self.contactmap is not None:
2047  tmp_array = np.zeros((nresx, nresy))
2048 
2049  for px in prot_listx:
2050  print(px)
2051  for py in prot_listy:
2052  print(py)
2053  resx = resoffsety[px]
2054  lengx = resendx[px] - 1
2055  resy = resoffsety[py]
2056  lengy = resendy[py] - 1
2057  indexes_x = self.index_dictionary[px]
2058  minx = min(indexes_x)
2059  maxx = max(indexes_x)
2060  indexes_y = self.index_dictionary[py]
2061  miny = min(indexes_y)
2062  maxy = max(indexes_y)
2063 
2064  print(px, py, minx, maxx, miny, maxy)
2065 
2066  try:
2067  tmp_array[
2068  resx:lengx,
2069  resy:lengy] = self.contactmap[
2070  minx:maxx,
2071  miny:maxy]
2072  except: # noqa: E722
2073  continue
2074 
2075  ax.imshow(tmp_array,
2076  cmap=cm.binary,
2077  origin='lower',
2078  interpolation='nearest')
2079 
2080  ax.set_xticks(xticks)
2081  ax.set_xticklabels(xlabels, rotation=90)
2082  ax.set_yticks(yticks)
2083  ax.set_yticklabels(ylabels)
2084  ax.set_xlim(0, nresx)
2085  ax.set_ylim(0, nresy)
2086 
2087  # set the cross-links
2088 
2089  already_added_xls = []
2090 
2091  for xl in self.crosslinks:
2092 
2093  (r1, c1, r2, c2, mdist, stdv, confidence,
2094  unique_identifier, descriptor) = xl
2095 
2096  if confidence_classes is not None:
2097  if confidence not in confidence_classes:
2098  continue
2099 
2100  try:
2101  pos1 = r1 + resoffsetx[c1]
2102  except: # noqa: E722
2103  continue
2104  try:
2105  pos2 = r2 + resoffsety[c2]
2106  except: # noqa: E722
2107  continue
2108 
2109  if filter is not None:
2110  xldb = self.external_csv_data[unique_identifier]
2111  xldb.update({"Distance": mdist})
2112  xldb.update({"Distance_stdv": stdv})
2113 
2114  if filter[1] == ">":
2115  if float(xldb[filter[0]]) <= float(filter[2]):
2116  continue
2117 
2118  if filter[1] == "<":
2119  if float(xldb[filter[0]]) >= float(filter[2]):
2120  continue
2121 
2122  if filter[1] == "==":
2123  if float(xldb[filter[0]]) != float(filter[2]):
2124  continue
2125 
2126  # all that below is used for plotting the diagonal
2127  # when you have a rectangolar plots
2128 
2129  pos_for_diagonal1 = r1 + resoffsetdiagonal[c1]
2130  pos_for_diagonal2 = r2 + resoffsetdiagonal[c2]
2131 
2132  if layout == 'lowerdiagonal':
2133  if pos_for_diagonal1 <= pos_for_diagonal2:
2134  continue
2135  if layout == 'upperdiagonal':
2136  if pos_for_diagonal1 >= pos_for_diagonal2:
2137  continue
2138 
2139  already_added_xls.append((r1, c1, r2, c2))
2140 
2141  if not no_confidence_info:
2142  if confidence == '0.01':
2143  markersize = 14 * scale_symbol_size
2144  elif confidence == '0.05':
2145  markersize = 9 * scale_symbol_size
2146  elif confidence == '0.1':
2147  markersize = 6 * scale_symbol_size
2148  else:
2149  markersize = 15 * scale_symbol_size
2150  else:
2151  markersize = 5 * scale_symbol_size
2152 
2153  if not no_dist_info:
2154  color = self.colormap(mdist)
2155  else:
2156  color = "gray"
2157 
2158  ax.plot(
2159  [pos1],
2160  [pos2],
2161  'o',
2162  c=color,
2163  alpha=alphablend,
2164  markersize=markersize)
2165 
2166  fig.set_size_inches(0.004 * nresx, 0.004 * nresy)
2167 
2168  for i in ax.spines.values():
2169  i.set_linewidth(2.0)
2170 
2171  if filename:
2172  plt.savefig(filename + ".pdf", dpi=300, transparent="False")
2173  else:
2174  plt.show()
2175 
2176  def get_frequency_statistics(self, prot_list,
2177  prot_list2=None):
2178 
2179  violated_histogram = {}
2180  satisfied_histogram = {}
2181  unique_cross_links = []
2182 
2183  for xl in self.unique_cross_link_list:
2184  (r1, c1, r2, c2, mdist) = xl
2185 
2186  # here we filter by the protein
2187  if prot_list2 is None:
2188  if c1 not in prot_list:
2189  continue
2190  if c2 not in prot_list:
2191  continue
2192  else:
2193  if c1 in prot_list and c2 in prot_list2:
2194  pass
2195  elif c1 in prot_list2 and c2 in prot_list:
2196  pass
2197  else:
2198  continue
2199 
2200  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
2201 
2202  if (r1, c1, r2, c2) not in unique_cross_links:
2203  if mdist > 35.0:
2204  if frequency not in violated_histogram:
2205  violated_histogram[frequency] = 1
2206  else:
2207  violated_histogram[frequency] += 1
2208  else:
2209  if frequency not in satisfied_histogram:
2210  satisfied_histogram[frequency] = 1
2211  else:
2212  satisfied_histogram[frequency] += 1
2213  unique_cross_links.append((r1, c1, r2, c2))
2214  unique_cross_links.append((r2, c2, r1, c1))
2215 
2216  print("# satisfied")
2217 
2218  total_number_of_crosslinks = 0
2219 
2220  for i in satisfied_histogram:
2221  # if i in violated_histogram:
2222  # print i, satisfied_histogram[i]+violated_histogram[i]
2223  # else:
2224  if i in violated_histogram:
2225  print(i, violated_histogram[i]+satisfied_histogram[i])
2226  else:
2227  print(i, satisfied_histogram[i])
2228  total_number_of_crosslinks += i*satisfied_histogram[i]
2229 
2230  print("# violated")
2231 
2232  for i in violated_histogram:
2233  print(i, violated_histogram[i])
2234  total_number_of_crosslinks += i*violated_histogram[i]
2235 
2236  print(total_number_of_crosslinks)
2237 
2238  def print_cross_link_binary_symbols(self, prot_list,
2239  prot_list2=None):
2240  tmp_matrix = []
2241  confidence_list = []
2242  for xl in self.crosslinks:
2243  (r1, c1, r2, c2, mdist, stdv, confidence,
2244  unique_identifier, descriptor) = xl
2245 
2246  if prot_list2 is None:
2247  if c1 not in prot_list:
2248  continue
2249  if c2 not in prot_list:
2250  continue
2251  else:
2252  if c1 in prot_list and c2 in prot_list2:
2253  pass
2254  elif c1 in prot_list2 and c2 in prot_list:
2255  pass
2256  else:
2257  continue
2258 
2259  if descriptor != "original":
2260  continue
2261 
2262  confidence_list.append(confidence)
2263 
2264  dists = self.cross_link_distances_unique[(r1, c1, r2, c2)]
2265  tmp_dist_binary = []
2266  for d in dists:
2267  if d < 35:
2268  tmp_dist_binary.append(1)
2269  else:
2270  tmp_dist_binary.append(0)
2271  tmp_matrix.append(tmp_dist_binary)
2272 
2273  matrix = list(zip(*tmp_matrix))
2274 
2275  satisfied_high_sum = 0
2276  satisfied_mid_sum = 0
2277  satisfied_low_sum = 0
2278  total_satisfied_sum = 0
2279  for k, m in enumerate(matrix):
2280  satisfied_high = 0
2281  total_high = 0
2282  satisfied_mid = 0
2283  total_mid = 0
2284  satisfied_low = 0
2285  total_low = 0
2286  total_satisfied = 0
2287  total = 0
2288  for n, b in enumerate(m):
2289  if confidence_list[n] == "0.01":
2290  total_high += 1
2291  if b == 1:
2292  satisfied_high += 1
2293  satisfied_high_sum += 1
2294  elif confidence_list[n] == "0.05":
2295  total_mid += 1
2296  if b == 1:
2297  satisfied_mid += 1
2298  satisfied_mid_sum += 1
2299  elif confidence_list[n] == "0.1":
2300  total_low += 1
2301  if b == 1:
2302  satisfied_low += 1
2303  satisfied_low_sum += 1
2304  if b == 1:
2305  total_satisfied += 1
2306  total_satisfied_sum += 1
2307  total += 1
2308  print(k, satisfied_high, total_high)
2309  print(k, satisfied_mid, total_mid)
2310  print(k, satisfied_low, total_low)
2311  print(k, total_satisfied, total)
2312  print(float(satisfied_high_sum) / len(matrix))
2313  print(float(satisfied_mid_sum) / len(matrix))
2314  print(float(satisfied_low_sum) / len(matrix))
2315 # ------------
2316 
2317  def get_unique_crosslinks_statistics(self, prot_list,
2318  prot_list2=None):
2319 
2320  print(prot_list)
2321  print(prot_list2)
2322  satisfied_high = 0
2323  total_high = 0
2324  satisfied_mid = 0
2325  total_mid = 0
2326  satisfied_low = 0
2327  total_low = 0
2328  total = 0
2329  tmp_matrix = []
2330  satisfied_string = []
2331  for xl in self.crosslinks:
2332  (r1, c1, r2, c2, mdist, stdv, confidence,
2333  unique_identifier, descriptor) = xl
2334 
2335  if prot_list2 is None:
2336  if c1 not in prot_list:
2337  continue
2338  if c2 not in prot_list:
2339  continue
2340  else:
2341  if c1 in prot_list and c2 in prot_list2:
2342  pass
2343  elif c1 in prot_list2 and c2 in prot_list:
2344  pass
2345  else:
2346  continue
2347 
2348  if descriptor != "original":
2349  continue
2350 
2351  total += 1
2352  if confidence == "0.01":
2353  total_high += 1
2354  if mdist <= 35:
2355  satisfied_high += 1
2356  if confidence == "0.05":
2357  total_mid += 1
2358  if mdist <= 35:
2359  satisfied_mid += 1
2360  if confidence == "0.1":
2361  total_low += 1
2362  if mdist <= 35:
2363  satisfied_low += 1
2364  if mdist <= 35:
2365  satisfied_string.append(1)
2366  else:
2367  satisfied_string.append(0)
2368 
2369  dists = self.cross_link_distances_unique[(r1, c1, r2, c2)]
2370  tmp_dist_binary = []
2371  for d in dists:
2372  if d < 35:
2373  tmp_dist_binary.append(1)
2374  else:
2375  tmp_dist_binary.append(0)
2376  tmp_matrix.append(tmp_dist_binary)
2377 
2378  print("unique satisfied_high/total_high", satisfied_high, "/",
2379  total_high)
2380  print("unique satisfied_mid/total_mid", satisfied_mid, "/", total_mid)
2381  print("unique satisfied_low/total_low", satisfied_low, "/", total_low)
2382  print("total", total)
2383 
2384  matrix = list(zip(*tmp_matrix))
2385  satisfied_models = 0
2386  satstr = ""
2387  for b in satisfied_string:
2388  if b == 0:
2389  satstr += "-"
2390  if b == 1:
2391  satstr += "*"
2392 
2393  for m in matrix:
2394  all_satisfied = True
2395  string = ""
2396  for n, b in enumerate(m):
2397  if b == 0:
2398  string += "0"
2399  if b == 1:
2400  string += "1"
2401  if b == 1 and satisfied_string[n] == 1:
2402  pass
2403  elif b == 1 and satisfied_string[n] == 0:
2404  pass
2405  elif b == 0 and satisfied_string[n] == 0:
2406  pass
2407  elif b == 0 and satisfied_string[n] == 1:
2408  all_satisfied = False
2409  if all_satisfied:
2410  satisfied_models += 1
2411  print(string)
2412  print(satstr, all_satisfied)
2413  print("models that satisfies the median satisfied cross-links/total "
2414  "models", satisfied_models, len(matrix))
2415 
2416  def plot_matrix_cross_link_distances_unique(self, figurename, prot_list,
2417  prot_list2=None):
2418 
2419  import matplotlib as mpl
2420  mpl.use('Agg')
2421  import matplotlib.pylab as pl
2422 
2423  tmp_matrix = []
2424  for kw in self.cross_link_distances_unique:
2425  (r1, c1, r2, c2) = kw
2426  dists = self.cross_link_distances_unique[kw]
2427 
2428  if prot_list2 is None:
2429  if c1 not in prot_list:
2430  continue
2431  if c2 not in prot_list:
2432  continue
2433  else:
2434  if c1 in prot_list and c2 in prot_list2:
2435  pass
2436  elif c1 in prot_list2 and c2 in prot_list:
2437  pass
2438  else:
2439  continue
2440  # append the sum of dists to order by that in the matrix plot
2441  dists.append(sum(dists))
2442  tmp_matrix.append(dists)
2443 
2444  tmp_matrix.sort(key=itemgetter(len(tmp_matrix[0]) - 1))
2445 
2446  # print len(tmp_matrix), len(tmp_matrix[0])-1
2447  matrix = np.zeros((len(tmp_matrix), len(tmp_matrix[0]) - 1))
2448 
2449  for i in range(len(tmp_matrix)):
2450  for k in range(len(tmp_matrix[i]) - 1):
2451  matrix[i][k] = tmp_matrix[i][k]
2452 
2453  print(matrix)
2454 
2455  fig = pl.figure()
2456  ax = fig.add_subplot(211)
2457 
2458  cax = ax.imshow(matrix, interpolation='nearest')
2459  fig.colorbar(cax)
2460  pl.savefig(figurename, dpi=300)
2461  pl.show()
2462 
2463  def plot_bars(
2464  self,
2465  filename,
2466  prots1,
2467  prots2,
2468  nxl_per_row=20,
2469  arrangement="inter",
2470  confidence_input="None"):
2471 
2472  data = []
2473  for xl in self.cross_link_distances:
2474  (r1, c1, r2, c2, mdist, confidence) = xl
2475  if c1 in prots1 and c2 in prots2:
2476  if arrangement == "inter" and c1 == c2:
2477  continue
2478  if arrangement == "intra" and c1 != c2:
2479  continue
2480  if confidence_input == confidence:
2481  label = str(c1) + ":" + str(r1) + \
2482  "-" + str(c2) + ":" + str(r2)
2483  values = self.cross_link_distances[xl]
2484  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
2485  data.append((label, values, mdist, frequency))
2486 
2487  sort_by_dist = sorted(data, key=lambda tup: tup[2])
2488  sort_by_dist = list(zip(*sort_by_dist))
2489  values = sort_by_dist[1]
2490  positions = list(range(len(values)))
2491  labels = sort_by_dist[0]
2492  frequencies = list(map(float, sort_by_dist[3]))
2493  frequencies = [f * 10.0 for f in frequencies]
2494 
2495  nchunks = int(float(len(values)) / nxl_per_row)
2496  values_chunks = IMP.pmi.tools.chunk_list_into_segments(values, nchunks)
2497  positions_chunks = IMP.pmi.tools.chunk_list_into_segments(
2498  positions,
2499  nchunks)
2500  frequencies_chunks = IMP.pmi.tools.chunk_list_into_segments(
2501  frequencies,
2502  nchunks)
2503  labels_chunks = IMP.pmi.tools.chunk_list_into_segments(labels, nchunks)
2504 
2505  for n, v in enumerate(values_chunks):
2506  p = positions_chunks[n]
2507  f = frequencies_chunks[n]
2509  filename + "." + str(n), v, p, f,
2510  valuename="Distance (Ang)",
2511  positionname="Unique " + arrangement + " Crosslinks",
2512  xlabels=labels_chunks[n])
2513 
2514  def crosslink_distance_histogram(self, filename,
2515  prot_list=None,
2516  prot_list2=None,
2517  confidence_classes=None,
2518  bins=40,
2519  color='#66CCCC',
2520  yplotrange=[0, 1],
2521  format="png",
2522  normalized=False):
2523  if prot_list is None:
2524  prot_list = list(self.prot_length_dict.keys())
2525 
2526  distances = []
2527  for xl in self.crosslinks:
2528  (r1, c1, r2, c2, mdist, stdv, confidence,
2529  unique_identifier, descriptor) = xl
2530 
2531  if confidence_classes is not None:
2532  if confidence not in confidence_classes:
2533  continue
2534 
2535  if prot_list2 is None:
2536  if c1 not in prot_list:
2537  continue
2538  if c2 not in prot_list:
2539  continue
2540  else:
2541  if c1 in prot_list and c2 in prot_list2:
2542  pass
2543  elif c1 in prot_list2 and c2 in prot_list:
2544  pass
2545  else:
2546  continue
2547 
2548  distances.append(mdist)
2549 
2551  filename, distances, valuename="C-alpha C-alpha distance [Ang]",
2552  bins=bins, color=color,
2553  format=format,
2554  reference_xline=35.0,
2555  yplotrange=yplotrange, normalized=normalized)
2556 
2557  def scatter_plot_xl_features(self, filename,
2558  feature1=None,
2559  feature2=None,
2560  prot_list=None,
2561  prot_list2=None,
2562  yplotrange=None,
2563  reference_ylines=None,
2564  distance_color=True,
2565  format="png"):
2566  import matplotlib as mpl
2567  mpl.use('Agg')
2568  import matplotlib.pyplot as plt
2569 
2570  fig = plt.figure(figsize=(10, 10))
2571  ax = fig.add_subplot(111)
2572 
2573  for xl in self.crosslinks:
2574  (r1, c1, r2, c2, mdist, stdv, confidence,
2575  unique_identifier, arrangement) = xl
2576 
2577  if prot_list2 is None:
2578  if c1 not in prot_list:
2579  continue
2580  if c2 not in prot_list:
2581  continue
2582  else:
2583  if c1 in prot_list and c2 in prot_list2:
2584  pass
2585  elif c1 in prot_list2 and c2 in prot_list:
2586  pass
2587  else:
2588  continue
2589 
2590  xldb = self.external_csv_data[unique_identifier]
2591  xldb.update({"Distance": mdist})
2592  xldb.update({"Distance_stdv": stdv})
2593 
2594  xvalue = float(xldb[feature1])
2595  yvalue = float(xldb[feature2])
2596 
2597  if distance_color:
2598  color = self.colormap(mdist)
2599  else:
2600  color = "gray"
2601 
2602  ax.plot([xvalue], [yvalue], 'o', c=color, alpha=0.1, markersize=7)
2603 
2604  if yplotrange is not None:
2605  ax.set_ylim(yplotrange)
2606  if reference_ylines is not None:
2607  for rl in reference_ylines:
2608  ax.axhline(rl, color='red', linestyle='dashed', linewidth=1)
2609 
2610  if filename:
2611  plt.savefig(filename + "." + format, dpi=150, transparent="False")
2612 
2613  plt.show()
def get_output
Returns output for IMP.pmi.output.Output object.
Simple 3D transformation class.
Visualization of cross-links.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
double get_drms(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
A class for reading stat files (either rmf or ascii v1 and v2)
Definition: output.py:982
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
def plot_field_histogram
Plot a list of histograms from a value list.
Definition: output.py:1804
def plot_fields_box_plots
Plot time series as boxplots.
Definition: output.py:1869
double get_drmsd(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1)
Calculate distance the root mean square deviation between two sets of 3D points.
Definition: atom/distance.h:49
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: pmi/tools.py:1159
Miscellaneous utilities.
Definition: pmi/tools.py:1
def __init__
Constructor.
void handle_use_deprecated(std::string message)
Break in this method in gdb to find deprecated uses at runtime.
double get_weighted_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2, const Floats &weights)
Compute the RMSD (without alignment) taking into account the copy ambiguity.
void link_restraints(RMF::FileConstHandle fh, const Restraints &hs)
A class to evaluate the precision of an ensemble.
log
Definition: log.py:1
A class to cluster structures.
def get_rmsd_wrt_reference_structure_with_alignment
First align then calculate RMSD.
def __init__
Constructor.
Definition: pmi/Analysis.py:30
Class for sampling a density map from particles.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
def add_structure
Read a structure into the ensemble and store (as coordinates).
DensityMap * multiply(const DensityMap *m1, const DensityMap *m2)
Return a density map for which voxel i contains the result of m1[i]*m2[i].
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:21
def add_subunits_density
Add a frame to the densities.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
Definition: DensityMap.h:678
def scatter_and_gather
Synchronize data over a parallel run.
Definition: pmi/tools.py:542
double get_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
Basic utilities for handling cryo-electron microscopy 3D density maps.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name.
def fill
Add coordinates for a single model.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
def do_cluster
Run K-means clustering.
3D rotation class.
Definition: Rotation3D.h:52
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:509
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def set_reference_structure
Read in a structure used for reference computation.
def get_rmsf
Calculate the residue mean square fluctuations (RMSF).
def get_average_distance_wrt_reference_structure
Compare the structure set to the reference structure.
def get_density
Get the current density for some component name.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
The general base class for IMP exceptions.
Definition: exception.h:48
def get_precision
Evaluate the precision of two named structure groups.
VectorD< 3 > Vector3D
Definition: VectorD.h:408
Restraints create_restraints(RMF::FileConstHandle fh, Model *m)
def get_moldict_coorddict
return data structure for the RMSD calculation
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def get_particles_at_resolution_ten
Get particles at res 10, or any beads, based on the name.
Python classes to represent, score, sample and analyze models.
def add_structures
Read a list of RMFs, supports parallel.
Transformation3D get_transformation_aligning_first_to_second(Vector3Ds a, Vector3Ds b)
Hierarchies get_leaves(const Selection &h)
double get_drmsd_Q(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1, double threshold)
Definition: atom/distance.h:85
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Compute mean density maps from structures.
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: pmi/tools.py:499
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27