IMP logo
IMP Reference Guide  develop.d97d4ead1f,2024/11/21
The Integrative Modeling Platform
pmi/Analysis.py
1 #!/usr/bin/env python
2 
3 """@namespace IMP.pmi.analysis
4  Tools for clustering and cluster analysis
5 """
6 import IMP
7 import IMP.algebra
8 import IMP.em
9 import IMP.pmi
10 import IMP.pmi.tools
11 import IMP.pmi.output
12 import IMP.rmf
13 import RMF
14 import IMP.pmi.analysis
15 from operator import itemgetter
16 from math import log, sqrt
17 import itertools
18 import numpy as np
19 
20 
21 class Alignment:
22  """Performs alignment and RMSD calculation for two sets of coordinates
23 
24  The class also takes into account non-equal stoichiometry of the proteins.
25  If this is the case, the protein names of proteins in multiple copies
26  should be specified in the following form:
27  nameA..1, nameA..2 (note two dots).
28  """
29 
30  def __init__(self, template, query, weights=None):
31  """Constructor.
32  @param query {'p1':coords(L,3), 'p2':coords(L,3)}
33  @param template {'p1':coords(L,3), 'p2':coords(L,3)}
34  @param weights optional weights for each set of coordinates
35  """
36  self.query = query
37  self.template = template
38  self.weights = weights
39 
40  if len(self.query.keys()) != len(self.template.keys()):
41  raise ValueError('''the number of proteins
42  in template and query does not match!''')
43 
44  def permute(self):
45  # get unique protein names for each protein
46  # this is, unfortunately, expecting that names are all 'Molname..X'
47  # where X is different for each copy.
48  self.proteins = sorted(self.query.keys())
49  prots_uniq = [i.split('..')[0] for i in self.proteins]
50 
51  # for each unique name, store list of permutations
52  # e.g. for keys A..1,A..2 store P[A] = [[A..1,A..2],[A..2,A..1]]
53  # then store the product: [[[A1,A2],[B1,B2]],[[A1,A2],[B2,B1]],
54  # [[A2,A1],[B1,B2]],[[A2,A1],[B2,B1]]]
55  P = {}
56  for p in prots_uniq:
57  copies = [i for i in self.proteins if i.split('..')[0] == p]
58  prmts = list(itertools.permutations(copies, len(copies)))
59  P[p] = prmts
60  self.P = P
61  self.Product = list(itertools.product(*P.values()))
62 
63  def get_rmsd(self):
64 
65  self.permute()
66 
67  template_xyz = []
68  weights = []
69  torder = sum([list(i) for i in self.Product[0]], [])
70  for t in torder:
71  template_xyz += [IMP.algebra.Vector3D(i) for i in self.template[t]]
72  if self.weights is not None:
73  weights += [i for i in self.weights[t]]
74 
75  self.rmsd = 10000000000.
76  for comb in self.Product:
77 
78  order = sum([list(i) for i in comb], [])
79  query_xyz = []
80  for p in order:
81  query_xyz += [IMP.algebra.Vector3D(i) for i in self.query[p]]
82 
83  if self.weights is not None:
85  template_xyz, query_xyz, weights)
86  else:
87  dist = IMP.algebra.get_rmsd(template_xyz, query_xyz)
88  if dist < self.rmsd:
89  self.rmsd = dist
90  return self.rmsd
91 
92  def align(self):
93  from scipy.spatial.distance import cdist
94 
95  self.permute()
96 
97  # create flat coordinate list from template in standard order
98  # then loop through the permutations and try to align and get RMSD
99  # should allow you to try all mappings within each protein
100  template_xyz = []
101  torder = sum([list(i) for i in self.Product[0]], [])
102  for t in torder:
103  template_xyz += [IMP.algebra.Vector3D(i) for i in self.template[t]]
104  # template_xyz = np.array(template_xyz)
105  self.rmsd, Transformation = 10000000000., ''
106 
107  # then for each permutation, get flat list of coords and get RMSD
108  for comb in self.Product:
109  order = sum([list(i) for i in comb], [])
110  query_xyz = []
111  for p in order:
112  query_xyz += [IMP.algebra.Vector3D(i) for i in self.query[p]]
113 
114  if len(template_xyz) != len(query_xyz):
115  raise ValueError('''the number of coordinates
116  in template and query does not match!''')
117 
118  transformation = \
120  query_xyz, template_xyz)
121  query_xyz_tr = [transformation.get_transformed(n)
122  for n in query_xyz]
123 
124  dist = sqrt(
125  sum(np.diagonal(cdist(template_xyz, query_xyz_tr) ** 2))
126  / len(template_xyz))
127  if dist < self.rmsd:
128  self.rmsd = dist
129  Transformation = transformation
130 
131  # return the transformation
132  return (self.rmsd, Transformation)
133 
134 
135 # TEST for the alignment ###
136 """
137 Proteins = {'a..1':np.array([np.array([-1.,1.])]),
138  'a..2':np.array([np.array([1.,1.,])]),
139  'a..3':np.array([np.array([-2.,1.])]),
140  'b':np.array([np.array([0.,-1.])]),
141  'c..1':np.array([np.array([-1.,-1.])]),
142  'c..2':np.array([np.array([1.,-1.])]),
143  'd':np.array([np.array([0.,0.])]),
144  'e':np.array([np.array([0.,1.])])}
145 
146 Ali = Alignment(Proteins, Proteins)
147 Ali.permute()
148 if Ali.get_rmsd() == 0.0: print 'successful test!'
149 else: print 'ERROR!'; exit()
150 """
151 
152 
153 # ----------------------------------
154 class Violations:
155 
156  def __init__(self, filename):
157 
158  self.violation_thresholds = {}
159  self.violation_counts = {}
160 
161  data = open(filename)
162  D = data.readlines()
163  data.close()
164 
165  for d in D:
166  d = d.strip().split()
167  self.violation_thresholds[d[0]] = float(d[1])
168 
169  def get_number_violated_restraints(self, rsts_dict):
170  num_violated = 0
171  for rst in self.violation_thresholds:
172  if rst not in rsts_dict:
173  continue # print rst;
174  if float(rsts_dict[rst]) > self.violation_thresholds[rst]:
175  num_violated += 1
176  if rst not in self.violation_counts:
177  self.violation_counts[rst] = 1
178  else:
179  self.violation_counts[rst] += 1
180  return num_violated
181 
182 
184  """A class to cluster structures.
185  Uses scipy's cdist function to compute distance matrices
186  and sklearn's kmeans clustering module.
187  """
188  def __init__(self, rmsd_weights=None):
189  """Constructor.
190  @param rmsd_weights Flat list of weights for each particle
191  (if they're coarse)
192  """
193  try:
194  from mpi4py import MPI
195  self.comm = MPI.COMM_WORLD
196  self.rank = self.comm.Get_rank()
197  self.number_of_processes = self.comm.size
198  except ImportError:
199  self.number_of_processes = 1
200  self.rank = 0
201  self.all_coords = {}
202  self.structure_cluster_ids = None
203  self.tmpl_coords = None
204  self.rmsd_weights = rmsd_weights
205 
206  def set_template(self, part_coords):
207  self.tmpl_coords = part_coords
208 
209  def fill(self, frame, Coords):
210  """Add coordinates for a single model."""
211  self.all_coords[frame] = Coords
212 
213  def dist_matrix(self):
214  self.model_list_names = list(self.all_coords.keys())
215  self.model_indexes = list(range(len(self.model_list_names)))
216  self.model_indexes_dict = dict(
217  list(zip(self.model_list_names, self.model_indexes)))
218  model_indexes_unique_pairs = \
219  list(itertools.combinations(self.model_indexes, 2))
220 
221  my_model_indexes_unique_pairs = IMP.pmi.tools.chunk_list_into_segments(
222  model_indexes_unique_pairs,
223  self.number_of_processes)[self.rank]
224 
225  print("process %s assigned with %s pairs"
226  % (str(self.rank), str(len(my_model_indexes_unique_pairs))))
227 
228  (raw_distance_dict, self.transformation_distance_dict) = \
229  self.matrix_calculation(self.all_coords, self.tmpl_coords,
230  my_model_indexes_unique_pairs)
231 
232  if self.number_of_processes > 1:
233  raw_distance_dict = IMP.pmi.tools.scatter_and_gather(
234  raw_distance_dict)
235  pickable_transformations = \
236  self.get_pickable_transformation_distance_dict()
237  pickable_transformations = IMP.pmi.tools.scatter_and_gather(
238  pickable_transformations)
239  self.set_transformation_distance_dict_from_pickable(
240  pickable_transformations)
241 
242  self.raw_distance_matrix = np.zeros(
243  (len(self.model_list_names), len(self.model_list_names)))
244  for item in raw_distance_dict:
245  (f1, f2) = item
246  self.raw_distance_matrix[f1, f2] = raw_distance_dict[item]
247  self.raw_distance_matrix[f2, f1] = raw_distance_dict[item]
248 
249  def get_dist_matrix(self):
250  return self.raw_distance_matrix
251 
252  def do_cluster(self, number_of_clusters, seed=None):
253  """Run K-means clustering
254  @param number_of_clusters Num means
255  @param seed the random seed
256  """
257  from sklearn.cluster import KMeans
258  if seed is not None:
259  np.random.seed(seed)
260  try:
261  # check whether we have the right version of sklearn
262  kmeans = KMeans(n_clusters=number_of_clusters)
263  except TypeError:
264  # sklearn older than 0.12
265  kmeans = KMeans(k=number_of_clusters)
266  kmeans.fit_predict(self.raw_distance_matrix)
267 
268  self.structure_cluster_ids = kmeans.labels_
269 
270  def get_pickable_transformation_distance_dict(self):
271  pickable_transformations = {}
272  for label in self.transformation_distance_dict:
273  tr = self.transformation_distance_dict[label]
274  trans = tuple(tr.get_translation())
275  rot = tuple(tr.get_rotation().get_quaternion())
276  pickable_transformations[label] = (rot, trans)
277  return pickable_transformations
278 
279  def set_transformation_distance_dict_from_pickable(
280  self,
281  pickable_transformations):
282  self.transformation_distance_dict = {}
283  for label in pickable_transformations:
284  tr = pickable_transformations[label]
285  trans = IMP.algebra.Vector3D(tr[1])
286  rot = IMP.algebra.Rotation3D(tr[0])
287  self.transformation_distance_dict[
288  label] = IMP.algebra.Transformation3D(rot, trans)
289 
290  def save_distance_matrix_file(self, file_name='cluster.rawmatrix.pkl'):
291  import pickle
292  outf = open(file_name + ".data", 'wb')
293 
294  # to pickle the transformation dictionary
295  # you have to save the arrays correposnding to
296  # the transformations
297 
298  pickable_transformations = \
299  self.get_pickable_transformation_distance_dict()
300  pickle.dump(
301  (self.structure_cluster_ids,
302  self.model_list_names,
303  pickable_transformations),
304  outf)
305 
306  np.save(file_name + ".npy", self.raw_distance_matrix)
307 
308  def load_distance_matrix_file(self, file_name='cluster.rawmatrix.pkl'):
309  import pickle
310 
311  inputf = open(file_name + ".data", 'rb')
312  (self.structure_cluster_ids, self.model_list_names,
313  pickable_transformations) = pickle.load(inputf)
314  inputf.close()
315 
316  self.raw_distance_matrix = np.load(file_name + ".npy")
317 
318  self.set_transformation_distance_dict_from_pickable(
319  pickable_transformations)
320  self.model_indexes = list(range(len(self.model_list_names)))
321  self.model_indexes_dict = dict(
322  list(zip(self.model_list_names, self.model_indexes)))
323 
324  def plot_matrix(self, figurename="clustermatrix.pdf"):
325  import matplotlib as mpl
326  mpl.use('Agg')
327  import matplotlib.pylab as pl
328  from scipy.cluster import hierarchy as hrc
329 
330  fig = pl.figure(figsize=(10, 8))
331  ax = fig.add_subplot(212)
332  dendrogram = hrc.dendrogram(
333  hrc.linkage(self.raw_distance_matrix),
334  color_threshold=7,
335  no_labels=True)
336  leaves_order = dendrogram['leaves']
337  ax.set_xlabel('Model')
338  ax.set_ylabel('RMSD [Angstroms]')
339 
340  ax2 = fig.add_subplot(221)
341  cax = ax2.imshow(
342  self.raw_distance_matrix[leaves_order,
343  :][:,
344  leaves_order],
345  interpolation='nearest')
346  cb = fig.colorbar(cax)
347  cb.set_label('RMSD [Angstroms]')
348  ax2.set_xlabel('Model')
349  ax2.set_ylabel('Model')
350 
351  pl.savefig(figurename, dpi=300)
352  pl.close(fig)
353 
354  def get_model_index_from_name(self, name):
355  return self.model_indexes_dict[name]
356 
357  def get_cluster_labels(self):
358  # this list
359  return list(set(self.structure_cluster_ids))
360 
361  def get_number_of_clusters(self):
362  return len(self.get_cluster_labels())
363 
364  def get_cluster_label_indexes(self, label):
365  return (
366  [i for i, l in enumerate(self.structure_cluster_ids) if l == label]
367  )
368 
369  def get_cluster_label_names(self, label):
370  return (
371  [self.model_list_names[i]
372  for i in self.get_cluster_label_indexes(label)]
373  )
374 
375  def get_cluster_label_average_rmsd(self, label):
376 
377  indexes = self.get_cluster_label_indexes(label)
378 
379  if len(indexes) > 1:
380  sub_distance_matrix = self.raw_distance_matrix[
381  indexes, :][:, indexes]
382  average_rmsd = np.sum(sub_distance_matrix) / \
383  (len(sub_distance_matrix)
384  ** 2 - len(sub_distance_matrix))
385  else:
386  average_rmsd = 0.0
387  return average_rmsd
388 
389  def get_cluster_label_size(self, label):
390  return len(self.get_cluster_label_indexes(label))
391 
392  def get_transformation_to_first_member(
393  self,
394  cluster_label,
395  structure_index):
396  reference = self.get_cluster_label_indexes(cluster_label)[0]
397  return self.transformation_distance_dict[(reference, structure_index)]
398 
399  def matrix_calculation(self, all_coords, template_coords, list_of_pairs):
400 
401  model_list_names = list(all_coords.keys())
402  rmsd_protein_names = list(all_coords[model_list_names[0]].keys())
403  raw_distance_dict = {}
404  transformation_distance_dict = {}
405  if template_coords is None:
406  do_alignment = False
407  else:
408  do_alignment = True
409  alignment_template_protein_names = list(template_coords.keys())
410 
411  for (f1, f2) in list_of_pairs:
412 
413  if not do_alignment:
414  # here we only get the rmsd,
415  # we need that for instance when you want to cluster
416  # conformations globally, eg the EM map is a reference
418 
419  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
420  for pr in rmsd_protein_names])
421  coords_f2 = {}
422  for pr in rmsd_protein_names:
423  coords_f2[pr] = all_coords[model_list_names[f2]][pr]
424 
425  Ali = Alignment(coords_f1, coords_f2, self.rmsd_weights)
426  rmsd = Ali.get_rmsd()
427 
428  elif do_alignment:
429  # here we actually align the conformations first
430  # and than calculate the rmsd. We need that when the
431  # protein(s) is the reference
432  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
433  for pr in alignment_template_protein_names])
434  coords_f2 = dict([(pr, all_coords[model_list_names[f2]][pr])
435  for pr in alignment_template_protein_names])
436 
437  Ali = Alignment(coords_f1, coords_f2)
438  template_rmsd, transformation = Ali.align()
439 
440  # here we calculate the rmsd
441  # we will align two models based n the number of subunits
442  # provided and transform coordinates of model 2 to model 1
443  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
444  for pr in rmsd_protein_names])
445  coords_f2 = {}
446  for pr in rmsd_protein_names:
447  coords_f2[pr] = [transformation.get_transformed(
448  i) for i in all_coords[model_list_names[f2]][pr]]
449 
450  Ali = Alignment(coords_f1, coords_f2, self.rmsd_weights)
451  rmsd = Ali.get_rmsd()
452 
453  raw_distance_dict[(f1, f2)] = rmsd
454  raw_distance_dict[(f2, f1)] = rmsd
455  transformation_distance_dict[(f1, f2)] = transformation
456  transformation_distance_dict[(f2, f1)] = transformation
457 
458  return raw_distance_dict, transformation_distance_dict
459 
460 
461 class RMSD:
462  """Compute the RMSD (without alignment) taking into account the copy
463  ambiguity.
464  To be used with pmi2 hierarchies. Can be used for instance as follows:
465 
466  rmsd = IMP.pmi.analysis.RMSD(
467  hier, hier, [mol.get_name() for mol in mols],
468  dynamic0=True, dynamic1=False)
469  output_objects.append(rmsd)
470 
471  before shuffling the coordinates
472  """
473 
474  def __init__(self, hier0, hier1, molnames, label="None", dynamic0=True,
475  dynamic1=True, metric=IMP.algebra.get_rmsd):
476  """
477  @param hier0 first input hierarchy
478  @param hier1 second input hierarchy
479  @param molnames the names of the molecules used for the RMSD
480  @dynamic0 if True stores the decorators XYZ and coordinates of
481  hier0 can be update. If false coordinates are static
482  (stored in Vector3Ds) and will never be updated
483  @dynamic1 same as above
484  metric what metric should be used
485  """
486  self.moldict0, self.molcoords0, self.mol_XYZs0 = \
487  self.get_moldict_coorddict(hier0, molnames)
488  self.moldict1, self.molcoords1, self.mol_XYZs1 = \
489  self.get_moldict_coorddict(hier1, molnames)
490  self.dynamic0 = dynamic0
491  self.dynamic1 = dynamic1
492  self.metric = metric
493  self.label = label
494 
495  def get_moldict_coorddict(self, hier, molnames):
496  """return data structure for the RMSD calculation"""
497  moldict = {}
498  mol_coords = {}
499  mol_XYZs = {}
500  for mol in IMP.pmi.tools.get_molecules(hier):
501  name = mol.get_name()
502  if name not in molnames:
503  continue
504  parts = True
505  mol_coords[mol] = []
506  mol_XYZs[mol] = []
507  i = 1
508  while parts:
509  sel = IMP.atom.Selection(
510  mol, residue_index=i,
511  representation_type=IMP.atom.BALLS,
512  resolution=1)
513  parts = sel.get_selected_particles()
514  if parts:
515  mol_coords[mol].append(
516  IMP.core.XYZ(parts[0]).get_coordinates())
517  mol_XYZs[mol].append(IMP.core.XYZ(parts[0]))
518  i = i+1
519  if name in moldict:
520  moldict[name].append(mol)
521  else:
522  moldict[name] = [mol]
523  return moldict, mol_coords, mol_XYZs
524 
525  def get_rmsd_and_assigments(self):
526  total_rmsd = 0
527  total_N = 0
528  best_assignments = []
529  rmsd_dict = {}
530  for molname, ref_mols in self.moldict1.items():
531  ref_coords = []
532  for ref_mol in ref_mols:
533  if self.dynamic1:
534  coord1 = [XYZ.get_coordinates()
535  for XYZ in self.mol_XYZs1[ref_mol]]
536  else:
537  coord1 = self.molcoords1[ref_mol]
538  ref_coords += coord1
539 
540  rmsd = []
541  rmf_mols_list = []
542  for rmf_mols in itertools.permutations(self.moldict0[molname]):
543  rmf_coords = []
544  for rmf_mol in rmf_mols:
545  if self.dynamic0:
546  coord0 = [XYZ.get_coordinates()
547  for XYZ in self.mol_XYZs0[rmf_mol]]
548  else:
549  coord0 = self.molcoords0[rmf_mol]
550  rmf_coords += coord0
551 
552  rmsd.append(IMP.algebra.get_rmsd(ref_coords, rmf_coords))
553  rmf_mols_list.append(rmf_mols)
554  m = min(rmsd)
555  rmf_mols_best_order = rmf_mols_list[rmsd.index(m)]
556 
557  for n, (ref_mol, rmf_mol) in enumerate(
558  zip(ref_mols, rmf_mols_best_order)):
559  best_assignments.append((rmf_mol, ref_mol))
560  if self.dynamic0:
561  coord0 = [XYZ.get_coordinates()
562  for XYZ in self.mol_XYZs0[rmf_mol]]
563  else:
564  coord0 = self.molcoords0[rmf_mol]
565 
566  if self.dynamic1:
567  coord1 = [XYZ.get_coordinates()
568  for XYZ in self.mol_XYZs1[ref_mol]]
569  else:
570  coord1 = self.molcoords1[ref_mol]
571  rmsd_pair = self.metric(coord1, coord0)
572  N = len(self.molcoords1[ref_mol])
573  total_N += N
574  total_rmsd += rmsd_pair*rmsd_pair*N
575  rmsd_dict[ref_mol] = rmsd_pair
576  total_rmsd = sqrt(total_rmsd/total_N)
577  return total_rmsd, best_assignments
578 
579  def get_output(self):
580  """Returns output for IMP.pmi.output.Output object"""
581  total_rmsd, best_assignments = self.get_rmsd_and_assigments()
582 
583  assignments_out = []
584  for rmf_mol, ref_mol in best_assignments:
585  ref_name = ref_mol.get_name()
586  ref_copy = IMP.atom.Copy(ref_mol).get_copy_index()
587  rmf_name = rmf_mol.get_name()
588  rmf_copy = IMP.atom.Copy(rmf_mol).get_copy_index()
589  assignments_out.append(rmf_name + "." + str(rmf_copy) + "->" +
590  ref_name + "." + str(ref_copy))
591  return {"RMSD_" + self.label: str(total_rmsd),
592  "RMSD_assignments_" + self.label: str(assignments_out)}
593 
594 
595 class Precision:
596  """A class to evaluate the precision of an ensemble.
597 
598  Also can evaluate the cross-precision of multiple ensembles.
599  Supports MPI for coordinate reading.
600  Recommended procedure:
601  -# initialize object and pass the selection for evaluating precision
602  -# call add_structures() to read in the data (specify group name)
603  -# call get_precision() to evaluate inter/intra precision
604  -# call get_rmsf() to evaluate within-group fluctuations
605  """
606  def __init__(self, model, resolution=1, selection_dictionary={}):
607  """Constructor.
608  @param model The IMP Model
609  @param resolution Use 1 or 10 (kluge: requires that "_Res:X" is
610  part of the hier name)
611  @param selection_dictionary Dictionary where keys are names for
612  selections and values are selection tuples for scoring
613  precision. "All" is automatically made as well
614  """
615  try:
616  from mpi4py import MPI
617  self.comm = MPI.COMM_WORLD
618  self.rank = self.comm.Get_rank()
619  self.number_of_processes = self.comm.size
620  except ImportError:
621  self.number_of_processes = 1
622  self.rank = 0
623 
624  self.styles = ['pairwise_rmsd', 'pairwise_drmsd_k', 'pairwise_drmsd_Q',
625  'pairwise_drms_k', 'pairwise_rmsd', 'drmsd_from_center']
626  self.style = 'pairwise_drmsd_k'
627  self.structures_dictionary = {}
628  self.reference_structures_dictionary = {}
629  self.prots = []
630  self.protein_names = None
631  self.len_particles_resolution_one = None
632  self.model = model
633  self.rmf_names_frames = {}
634  self.reference_rmf_names_frames = None
635  self.reference_structure = None
636  self.reference_prot = None
637  self.selection_dictionary = selection_dictionary
638  self.threshold = 40.0
639  self.residue_particle_index_map = None
640  self.prots = None
641  if resolution in [1, 10]:
642  self.resolution = resolution
643  else:
644  raise KeyError("Currently only allow resolution 1 or 10")
645 
646  def _get_structure(self, rmf_frame_index, rmf_name):
647  """Read an RMF file and return the particles"""
648  rh = RMF.open_rmf_file_read_only(rmf_name)
649  if not self.prots:
650  print("getting coordinates for frame %i rmf file %s"
651  % (rmf_frame_index, rmf_name))
652  self.prots = IMP.rmf.create_hierarchies(rh, self.model)
653  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
654  else:
655  print("linking coordinates for frame %i rmf file %s"
656  % (rmf_frame_index, rmf_name))
657  IMP.rmf.link_hierarchies(rh, self.prots)
658  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
659  del rh
660 
661  if self.resolution == 1:
662  particle_dict = get_particles_at_resolution_one(self.prots[0])
663  elif self.resolution == 10:
664  particle_dict = get_particles_at_resolution_ten(self.prots[0])
665 
666  protein_names = list(particle_dict.keys())
667  particles_resolution_one = []
668  for k in particle_dict:
669  particles_resolution_one += (particle_dict[k])
670 
671  if self.protein_names is None:
672  self.protein_names = protein_names
673  else:
674  if self.protein_names != protein_names:
675  print("Error: the protein names of the new coordinate set "
676  "is not compatible with the previous one")
677 
678  if self.len_particles_resolution_one is None:
679  self.len_particles_resolution_one = len(particles_resolution_one)
680  else:
681  if self.len_particles_resolution_one != \
682  len(particles_resolution_one):
683  raise ValueError("the new coordinate set is not compatible "
684  "with the previous one")
685 
686  return particles_resolution_one
687 
688  def add_structure(self,
689  rmf_name,
690  rmf_frame_index,
691  structure_set_name,
692  setup_index_map=False):
693  """ Read a structure into the ensemble and store (as coordinates).
694  @param rmf_name The name of the RMF file
695  @param rmf_frame_index The frame to read
696  @param structure_set_name Name for the set that includes this structure
697  (e.g. "cluster 1")
698  @param setup_index_map if requested, set up a dictionary to help
699  find residue indexes
700  """
701 
702  # decide where to put this structure
703  if structure_set_name in self.structures_dictionary:
704  cdict = self.structures_dictionary[structure_set_name]
705  rmflist = self.rmf_names_frames[structure_set_name]
706  else:
707  self.structures_dictionary[structure_set_name] = {}
708  self.rmf_names_frames[structure_set_name] = []
709  cdict = self.structures_dictionary[structure_set_name]
710  rmflist = self.rmf_names_frames[structure_set_name]
711 
712  # read the particles
713  try:
714  particles_resolution_one = self._get_structure(
715  rmf_frame_index, rmf_name)
716  except ValueError:
717  print("something wrong with the rmf")
718  return 0
719  self.selection_dictionary.update({"All": self.protein_names})
720 
721  for selection_name in self.selection_dictionary:
722  selection_tuple = self.selection_dictionary[selection_name]
723  coords = self._select_coordinates(
724  selection_tuple, particles_resolution_one, self.prots[0])
725  if selection_name not in cdict:
726  cdict[selection_name] = [coords]
727  else:
728  cdict[selection_name].append(coords)
729 
730  rmflist.append((rmf_name, rmf_frame_index))
731 
732  # if requested, set up a dictionary to help find residue indexes
733  if setup_index_map:
734  self.residue_particle_index_map = {}
735  for prot_name in self.protein_names:
736  self.residue_particle_index_map[prot_name] = \
737  self._get_residue_particle_index_map(
738  prot_name, particles_resolution_one, self.prots[0])
739 
740  def add_structures(self,
741  rmf_name_frame_tuples,
742  structure_set_name):
743  """Read a list of RMFs, supports parallel
744  @param rmf_name_frame_tuples list of (rmf_file_name,frame_number)
745  @param structure_set_name Name this set of structures
746  (e.g. "cluster.1")
747  """
748 
749  # split up the requested list to read in parallel
750  my_rmf_name_frame_tuples = IMP.pmi.tools.chunk_list_into_segments(
751  rmf_name_frame_tuples, self.number_of_processes)[self.rank]
752  for nfr, tup in enumerate(my_rmf_name_frame_tuples):
753  rmf_name = tup[0]
754  rmf_frame_index = tup[1]
755  # the first frame stores the map between residues and particles
756  if self.residue_particle_index_map is None:
757  setup_index_map = True
758  else:
759  setup_index_map = False
760  self.add_structure(rmf_name,
761  rmf_frame_index,
762  structure_set_name,
763  setup_index_map)
764 
765  # synchronize the structures
766  if self.number_of_processes > 1:
767  self.rmf_names_frames = IMP.pmi.tools.scatter_and_gather(
768  self.rmf_names_frames)
769  if self.rank != 0:
770  self.comm.send(self.structures_dictionary, dest=0, tag=11)
771  elif self.rank == 0:
772  for i in range(1, self.number_of_processes):
773  data_tmp = self.comm.recv(source=i, tag=11)
774  for key in self.structures_dictionary:
775  self.structures_dictionary[key].update(data_tmp[key])
776  for i in range(1, self.number_of_processes):
777  self.comm.send(self.structures_dictionary, dest=i, tag=11)
778  if self.rank != 0:
779  self.structures_dictionary = self.comm.recv(source=0, tag=11)
780 
781  def _get_residue_particle_index_map(self, prot_name, structure, hier):
782  # Creates map from all particles to residue numbers
783  residue_particle_index_map = []
784  s = IMP.atom.Selection(hier, molecules=[prot_name], resolution=1)
785  all_selected_particles = s.get_selected_particles()
786  intersection = list(set(all_selected_particles) & set(structure))
787  sorted_intersection = IMP.pmi.tools.sort_by_residues(intersection)
788  for p in sorted_intersection:
789  residue_particle_index_map.append(
791  return residue_particle_index_map
792 
793  def _select_coordinates(self, tuple_selections, structure, prot):
794  selected_coordinates = []
795  for t in tuple_selections:
796  if isinstance(t, tuple) and len(t) == 3:
797  s = IMP.atom.Selection(
798  prot, molecules=[t[2]],
799  residue_indexes=range(t[0], t[1]+1), resolution=1)
800  all_selected_particles = s.get_selected_particles()
801  intersection = list(set(all_selected_particles)
802  & set(structure))
803  sorted_intersection = \
804  IMP.pmi.tools.sort_by_residues(intersection)
805  cc = [tuple(IMP.core.XYZ(p).get_coordinates())
806  for p in sorted_intersection]
807  selected_coordinates += cc
808  elif isinstance(t, str):
809  s = IMP.atom.Selection(prot, molecules=[t], resolution=1)
810  all_selected_particles = s.get_selected_particles()
811  intersection = list(set(all_selected_particles)
812  & set(structure))
813  sorted_intersection = \
814  IMP.pmi.tools.sort_by_residues(intersection)
815  cc = [tuple(IMP.core.XYZ(p).get_coordinates())
816  for p in sorted_intersection]
817  selected_coordinates += cc
818  else:
819  raise ValueError("Selection error")
820  return selected_coordinates
821 
822  def set_threshold(self, threshold):
823  self.threshold = threshold
824 
825  def _get_distance(self, structure_set_name1, structure_set_name2,
826  selection_name, index1, index2):
827  """Compute distance between structures with various metrics"""
828  c1 = self.structures_dictionary[
829  structure_set_name1][selection_name][index1]
830  c2 = self.structures_dictionary[
831  structure_set_name2][selection_name][index2]
832  coordinates1 = [IMP.algebra.Vector3D(c) for c in c1]
833  coordinates2 = [IMP.algebra.Vector3D(c) for c in c2]
834 
835  if self.style == 'pairwise_drmsd_k':
836  distance = IMP.atom.get_drmsd(coordinates1, coordinates2)
837  if self.style == 'pairwise_drms_k':
838  distance = IMP.atom.get_drms(coordinates1, coordinates2)
839  if self.style == 'pairwise_drmsd_Q':
840  distance = IMP.atom.get_drmsd_Q(coordinates1, coordinates2,
841  self.threshold)
842 
843  if self.style == 'pairwise_rmsd':
844  distance = IMP.algebra.get_rmsd(coordinates1, coordinates2)
845  return distance
846 
847  def _get_particle_distances(self, structure_set_name1, structure_set_name2,
848  selection_name, index1, index2):
849  c1 = self.structures_dictionary[
850  structure_set_name1][selection_name][index1]
851  c2 = self.structures_dictionary[
852  structure_set_name2][selection_name][index2]
853 
854  coordinates1 = [IMP.algebra.Vector3D(c) for c in c1]
855  coordinates2 = [IMP.algebra.Vector3D(c) for c in c2]
856 
857  distances = [np.linalg.norm(a-b)
858  for (a, b) in zip(coordinates1, coordinates2)]
859 
860  return distances
861 
862  def get_precision(self, structure_set_name1, structure_set_name2,
863  outfile=None, skip=1, selection_keywords=None):
864  """ Evaluate the precision of two named structure groups. Supports MPI.
865  When the structure_set_name1 is different from the structure_set_name2,
866  this evaluates the cross-precision (average pairwise distances).
867  @param outfile Name of the precision output file
868  @param structure_set_name1 string name of the first structure set
869  @param structure_set_name2 string name of the second structure set
870  @param skip analyze every (skip) structure for the distance
871  matrix calculation
872  @param selection_keywords Specify the selection name you want to
873  calculate on. By default this is computed for everything
874  you provided in the constructor, plus all the subunits together.
875  """
876  if selection_keywords is None:
877  sel_keys = list(self.selection_dictionary.keys())
878  else:
879  for k in selection_keywords:
880  if k not in self.selection_dictionary:
881  raise KeyError(
882  "you are trying to find named selection "
883  + k + " which was not requested in the constructor")
884  sel_keys = selection_keywords
885 
886  if outfile is not None:
887  of = open(outfile, "w")
888  centroid_index = 0
889  for selection_name in sel_keys:
890  number_of_structures_1 = len(self.structures_dictionary[
891  structure_set_name1][selection_name])
892  number_of_structures_2 = len(self.structures_dictionary[
893  structure_set_name2][selection_name])
894  distances = {}
895  structure_pointers_1 = list(range(0, number_of_structures_1, skip))
896  structure_pointers_2 = list(range(0, number_of_structures_2, skip))
897  pair_combination_list = list(
898  itertools.product(structure_pointers_1, structure_pointers_2))
899  if len(pair_combination_list) == 0:
900  raise ValueError("no structure selected. Check the "
901  "skip parameter.")
902 
903  # compute pairwise distances in parallel
904  my_pair_combination_list = IMP.pmi.tools.chunk_list_into_segments(
905  pair_combination_list, self.number_of_processes)[self.rank]
906  for n, pair in enumerate(my_pair_combination_list):
907  distances[pair] = self._get_distance(
908  structure_set_name1, structure_set_name2, selection_name,
909  pair[0], pair[1])
910  if self.number_of_processes > 1:
911  distances = IMP.pmi.tools.scatter_and_gather(distances)
912 
913  # Finally compute distance to centroid
914  if self.rank == 0:
915  if structure_set_name1 == structure_set_name2:
916  structure_pointers = structure_pointers_1
917  number_of_structures = number_of_structures_1
918 
919  # calculate the distance from the first centroid
920  # and determine the centroid
921  distance = 0.0
922  distances_to_structure = {}
923  distances_to_structure_normalization = {}
924 
925  for n in structure_pointers:
926  distances_to_structure[n] = 0.0
927  distances_to_structure_normalization[n] = 0
928 
929  for k in distances:
930  distance += distances[k]
931  distances_to_structure[k[0]] += distances[k]
932  distances_to_structure[k[1]] += distances[k]
933  distances_to_structure_normalization[k[0]] += 1
934  distances_to_structure_normalization[k[1]] += 1
935 
936  for n in structure_pointers:
937  distances_to_structure[n] = \
938  distances_to_structure[n] \
939  / distances_to_structure_normalization[n]
940 
941  min_distance = min([distances_to_structure[n]
942  for n in distances_to_structure])
943  centroid_index = [k for k, v in
944  distances_to_structure.items()
945  if v == min_distance][0]
946  centroid_rmf_name = \
947  self.rmf_names_frames[
948  structure_set_name1][centroid_index]
949 
950  centroid_distance = 0.0
951  distance_list = []
952  for n in range(number_of_structures):
953  dist = self._get_distance(
954  structure_set_name1, structure_set_name1,
955  selection_name, centroid_index, n)
956  centroid_distance += dist
957  distance_list.append(dist)
958 
959  centroid_distance /= number_of_structures
960  if outfile is not None:
961  of.write(str(selection_name) + " " +
962  structure_set_name1 +
963  " average centroid distance " +
964  str(centroid_distance) + "\n")
965  of.write(str(selection_name) + " " +
966  structure_set_name1 + " centroid index " +
967  str(centroid_index) + "\n")
968  of.write(str(selection_name) + " " +
969  structure_set_name1 + " centroid rmf name " +
970  str(centroid_rmf_name) + "\n")
971  of.write(str(selection_name) + " " +
972  structure_set_name1 +
973  " median centroid distance " +
974  str(np.median(distance_list)) + "\n")
975 
976  average_pairwise_distances = sum(
977  distances.values())/len(list(distances.values()))
978  if outfile is not None:
979  of.write(str(selection_name) + " " + structure_set_name1 +
980  " " + structure_set_name2 +
981  " average pairwise distance " +
982  str(average_pairwise_distances) + "\n")
983  if outfile is not None:
984  of.close()
985  return centroid_index
986 
987  def get_rmsf(self, structure_set_name, outdir="./", skip=1,
988  set_plot_yaxis_range=None):
989  """ Calculate the residue mean square fluctuations (RMSF).
990  Automatically outputs as data file and pdf
991  @param structure_set_name Which structure set to calculate RMSF for
992  @param outdir Where to write the files
993  @param skip Skip this number of structures
994  @param set_plot_yaxis_range In case you need to change the plot
995  """
996  # get the centroid structure for the whole complex
997  centroid_index = self.get_precision(structure_set_name,
998  structure_set_name,
999  outfile=None,
1000  skip=skip)
1001  if self.rank == 0:
1002  for sel_name in self.protein_names:
1003  self.selection_dictionary.update({sel_name: [sel_name]})
1004  try:
1005  number_of_structures = \
1006  len(self.structures_dictionary[
1007  structure_set_name][sel_name])
1008  except KeyError:
1009  # that protein was not included in the selection
1010  continue
1011  rpim = self.residue_particle_index_map[sel_name]
1012  outfile = outdir+"/rmsf."+sel_name+".dat"
1013  of = open(outfile, "w")
1014  residue_distances = {}
1015  residue_nblock = {}
1016  for index in range(number_of_structures):
1017  distances = self._get_particle_distances(
1018  structure_set_name, structure_set_name, sel_name,
1019  centroid_index, index)
1020  for nblock, block in enumerate(rpim):
1021  for residue_number in block:
1022  residue_nblock[residue_number] = nblock
1023  if residue_number not in residue_distances:
1024  residue_distances[residue_number] = \
1025  [distances[nblock]]
1026  else:
1027  residue_distances[
1028  residue_number].append(distances[nblock])
1029 
1030  residues = []
1031  rmsfs = []
1032  for rn in residue_distances:
1033  residues.append(rn)
1034  rmsf = np.std(residue_distances[rn])
1035  rmsfs.append(rmsf)
1036  of.write(str(rn) + " " + str(residue_nblock[rn])
1037  + " " + str(rmsf) + "\n")
1038 
1039  IMP.pmi.output.plot_xy_data(
1040  residues, rmsfs, title=sel_name,
1041  out_fn=outdir+"/rmsf."+sel_name, display=False,
1042  set_plot_yaxis_range=set_plot_yaxis_range,
1043  xlabel='Residue Number', ylabel='Standard error')
1044  of.close()
1045 
1046  def set_reference_structure(self, rmf_name, rmf_frame_index):
1047  """Read in a structure used for reference computation.
1048  Needed before calling get_average_distance_wrt_reference_structure()
1049  @param rmf_name The RMF file to read the reference
1050  @param rmf_frame_index The index in that file
1051  """
1052  particles_resolution_one = self._get_structure(rmf_frame_index,
1053  rmf_name)
1054  self.reference_rmf_names_frames = (rmf_name, rmf_frame_index)
1055 
1056  for selection_name in self.selection_dictionary:
1057  selection_tuple = self.selection_dictionary[selection_name]
1058  coords = self._select_coordinates(
1059  selection_tuple, particles_resolution_one, self.prots[0])
1060  self.reference_structures_dictionary[selection_name] = coords
1061 
1063  self, structure_set_name, alignment_selection_key):
1064  """First align then calculate RMSD
1065  @param structure_set_name: the name of the structure set
1066  @param alignment_selection_key: the key containing the selection tuples
1067  needed to make the alignment stored in self.selection_dictionary
1068  @return: for each structure in the structure set, returns the rmsd
1069  """
1070  ret = {}
1071  if self.reference_structures_dictionary == {}:
1072  print("Cannot compute until you set a reference structure")
1073  return
1074 
1075  align_reference_coordinates = \
1076  self.reference_structures_dictionary[alignment_selection_key]
1077  align_coordinates = self.structures_dictionary[
1078  structure_set_name][alignment_selection_key]
1079  transformations = []
1080  for c in align_coordinates:
1082  {"All": align_reference_coordinates}, {"All": c})
1083  transformation = Ali.align()[1]
1084  transformations.append(transformation)
1085  for selection_name in self.selection_dictionary:
1086  reference_coordinates = \
1087  self.reference_structures_dictionary[selection_name]
1088  coordinates2 = [IMP.algebra.Vector3D(c)
1089  for c in reference_coordinates]
1090  distances = []
1091  for n, sc in enumerate(self.structures_dictionary[
1092  structure_set_name][selection_name]):
1093  coordinates1 = [
1094  transformations[n].get_transformed(IMP.algebra.Vector3D(c))
1095  for c in sc]
1096  distance = IMP.algebra.get_rmsd(coordinates1, coordinates2)
1097  distances.append(distance)
1098  ret[selection_name] = {
1099  'all_distances': distances,
1100  'average_distance': sum(distances)/len(distances),
1101  'minimum_distance': min(distances)}
1102  return ret
1103 
1104  def _get_median(self, list_of_values):
1105  return np.median(np.array(list_of_values))
1106 
1107  def get_average_distance_wrt_reference_structure(self, structure_set_name):
1108  """Compare the structure set to the reference structure.
1109  @param structure_set_name The structure set to compute this on
1110  @note First call set_reference_structure()
1111  """
1112  ret = {}
1113  if self.reference_structures_dictionary == {}:
1114  print("Cannot compute until you set a reference structure")
1115  return
1116  for selection_name in self.selection_dictionary:
1117  reference_coordinates = self.reference_structures_dictionary[
1118  selection_name]
1119  coordinates2 = [IMP.algebra.Vector3D(c)
1120  for c in reference_coordinates]
1121  distances = []
1122  for sc in self.structures_dictionary[
1123  structure_set_name][selection_name]:
1124  coordinates1 = [IMP.algebra.Vector3D(c) for c in sc]
1125  if self.style == 'pairwise_drmsd_k':
1126  distance = IMP.atom.get_drmsd(coordinates1, coordinates2)
1127  if self.style == 'pairwise_drms_k':
1128  distance = IMP.atom.get_drms(coordinates1, coordinates2)
1129  if self.style == 'pairwise_drmsd_Q':
1130  distance = IMP.atom.get_drmsd_Q(
1131  coordinates1, coordinates2, self.threshold)
1132  if self.style == 'pairwise_rmsd':
1133  distance = IMP.algebra.get_rmsd(coordinates1, coordinates2)
1134  distances.append(distance)
1135 
1136  print(selection_name, "average distance",
1137  sum(distances)/len(distances), "minimum distance",
1138  min(distances), 'nframes', len(distances))
1139  ret[selection_name] = {
1140  'average_distance': sum(distances)/len(distances),
1141  'minimum_distance': min(distances)}
1142  return ret
1143 
1144  def get_coordinates(self):
1145  pass
1146 
1147  def set_precision_style(self, style):
1148  if style in self.styles:
1149  self.style = style
1150  else:
1151  raise ValueError("No such style")
1152 
1153 
1155  """Compute mean density maps from structures.
1156 
1157  Keeps a dictionary of density maps,
1158  keys are in the custom ranges. When you call add_subunits_density, it adds
1159  particle coordinates to the existing density maps.
1160  """
1161 
1162  def __init__(self, custom_ranges, representation=None, resolution=20.0,
1163  voxel=5.0):
1164  """Constructor.
1165  @param custom_ranges Required. It's a dictionary, keys are the
1166  density component names, values are selection tuples
1167  e.g. {'kin28':[['kin28',1,-1]],
1168  'density_name_1' :[('ccl1')],
1169  'density_name_2' :[(1,142,'tfb3d1'),
1170  (143,700,'tfb3d2')],
1171  @param resolution The MRC resolution of the output map (in
1172  Angstrom unit)
1173  @param voxel The voxel size for the output map (lower is slower)
1174  """
1175 
1176  if representation:
1178  "The 'representation' argument is deprecated "
1179  "(and is ignored). Pass hierarchies to "
1180  "add_subunits_density() instead.")
1181  self.MRCresolution = resolution
1182  self.voxel = voxel
1183  self.densities = {}
1184  self.count_models = 0.0
1185  self.custom_ranges = custom_ranges
1186 
1187  def add_subunits_density(self, hierarchy):
1188  """Add a frame to the densities.
1189  @param hierarchy The hierarchy to add.
1190  """
1191  self.count_models += 1.0
1192 
1193  part_dict = get_particles_at_resolution_one(hierarchy)
1194  all_particles_by_resolution = []
1195  for name in part_dict:
1196  all_particles_by_resolution += part_dict[name]
1197 
1198  for density_name in self.custom_ranges:
1199  parts = []
1200  all_particles_by_segments = []
1201 
1202  for seg in self.custom_ranges[density_name]:
1203  if isinstance(seg, str):
1204  s = IMP.atom.Selection(hierarchy, molecule=seg)
1205  elif isinstance(seg, tuple) and len(seg) == 2:
1206  s = IMP.atom.Selection(
1207  hierarchy, molecule=seg[0], copy_index=seg[1])
1208  elif isinstance(seg, tuple) and len(seg) == 3:
1209  s = IMP.atom.Selection(
1210  hierarchy, molecule=seg[2],
1211  residue_indexes=range(seg[0], seg[1] + 1))
1212  elif isinstance(seg, tuple) and len(seg) == 4:
1213  s = IMP.atom.Selection(
1214  hierarchy, molecule=seg[2],
1215  residue_indexes=range(seg[0], seg[1] + 1),
1216  copy_index=seg[3])
1217  else:
1218  raise Exception('could not understand selection tuple '
1219  + str(seg))
1220 
1221  all_particles_by_segments += s.get_selected_particles()
1222  parts = all_particles_by_segments
1223  self._create_density_from_particles(parts, density_name)
1224 
1225  def normalize_density(self):
1226  pass
1227 
1228  def _create_density_from_particles(self, ps, name,
1229  kernel_type='GAUSSIAN'):
1230  '''Internal function for adding to densities.
1231  pass XYZR particles with mass and create a density from them.
1232  kernel type options are GAUSSIAN, BINARIZED_SPHERE, and SPHERE.'''
1233  dmap = IMP.em.SampledDensityMap(ps, self.MRCresolution, self.voxel)
1234  dmap.calcRMS()
1235  dmap.set_was_used(True)
1236  if name not in self.densities:
1237  self.densities[name] = dmap
1238  else:
1239  bbox1 = IMP.em.get_bounding_box(self.densities[name])
1240  bbox2 = IMP.em.get_bounding_box(dmap)
1241  bbox1 += bbox2
1242  dmap3 = IMP.em.create_density_map(bbox1, self.voxel)
1243  dmap3.set_was_used(True)
1244  dmap3.add(dmap)
1245  dmap3.add(self.densities[name])
1246  self.densities[name] = dmap3
1247 
1248  def get_density_keys(self):
1249  return list(self.densities.keys())
1250 
1251  def get_density(self, name):
1252  """Get the current density for some component name"""
1253  if name not in self.densities:
1254  return None
1255  else:
1256  return self.densities[name]
1257 
1258  def write_mrc(self, path="./", suffix=None):
1259  import os
1260  import errno
1261  for density_name in self.densities:
1262  self.densities[density_name].multiply(1. / self.count_models)
1263  if suffix is None:
1264  name = path + "/" + density_name + ".mrc"
1265  else:
1266  name = path + "/" + density_name + "." + suffix + ".mrc"
1267  path, file = os.path.split(name)
1268  if not os.path.exists(path):
1269  try:
1270  os.makedirs(path)
1271  except OSError as e:
1272  if e.errno != errno.EEXIST:
1273  raise
1274  IMP.em.write_map(self.densities[density_name], name,
1276 
1277 
1278 class GetContactMap:
1279 
1280  def __init__(self, distance=15.):
1281  self.distance = distance
1282  self.contactmap = ''
1283  self.namelist = []
1284  self.xlinks = 0
1285  self.XL = {}
1286  self.expanded = {}
1287  self.resmap = {}
1288 
1289  def set_prot(self, prot):
1290  self.prot = prot
1291  self.protnames = []
1292 
1293  particles_dictionary = get_particles_at_resolution_one(self.prot)
1294 
1295  for name in particles_dictionary:
1296  residue_indexes = []
1297  for p in particles_dictionary[name]:
1298  print(p.get_name())
1299  residue_indexes.extend(IMP.pmi.tools.get_residue_indexes(p))
1300 
1301  if len(residue_indexes) != 0:
1302  self.protnames.append(name)
1303 
1304  def get_subunit_coords(self, frame, align=0):
1305  from scipy.spatial.distance import cdist
1306  coords = []
1307  radii = []
1308  namelist = []
1309  for part in self.prot.get_children():
1310  SortedSegments = []
1311  print(part)
1312  for chl in part.get_children():
1313  start = IMP.atom.get_leaves(chl)[0]
1314 
1315  startres = IMP.atom.Fragment(start).get_residue_indexes()[0]
1316  SortedSegments.append((chl, startres))
1317  SortedSegments = sorted(SortedSegments, key=itemgetter(1))
1318 
1319  for sgmnt in SortedSegments:
1320  for leaf in IMP.atom.get_leaves(sgmnt[0]):
1321  p = IMP.core.XYZR(leaf)
1322  crd = np.array([p.get_x(), p.get_y(), p.get_z()])
1323 
1324  coords.append(crd)
1325  radii.append(p.get_radius())
1326 
1327  new_name = part.get_name() + '_' + sgmnt[0].get_name() + \
1328  '_' + \
1329  str(IMP.atom.Fragment(leaf)
1330  .get_residue_indexes()[0])
1331  namelist.append(new_name)
1332  self.expanded[new_name] = len(
1333  IMP.atom.Fragment(leaf).get_residue_indexes())
1334  if part.get_name() not in self.resmap:
1335  self.resmap[part.get_name()] = {}
1336  for res in IMP.atom.Fragment(leaf).get_residue_indexes():
1337  self.resmap[part.get_name()][res] = new_name
1338 
1339  coords = np.array(coords)
1340  radii = np.array(radii)
1341  if len(self.namelist) == 0:
1342  self.namelist = namelist
1343  self.contactmap = np.zeros((len(coords), len(coords)))
1344  distances = cdist(coords, coords)
1345  distances = (distances - radii).T - radii
1346  distances = distances <= self.distance
1347  self.contactmap += distances
1348 
1349  def add_xlinks(self, filename,
1350  identification_string='ISDCrossLinkMS_Distance_'):
1351  self.xlinks = 1
1352  with open(filename) as data:
1353  D = data.readlines()
1354 
1355  for d in D:
1356  if identification_string in d:
1357  d = d.replace("_", " ").replace("-", " ").replace(
1358  ":", " ").split()
1359 
1360  t1, t2 = (d[0], d[1]), (d[1], d[0])
1361  if t1 not in self.XL:
1362  self.XL[t1] = [(int(d[2]) + 1, int(d[3]) + 1)]
1363  self.XL[t2] = [(int(d[3]) + 1, int(d[2]) + 1)]
1364  else:
1365  self.XL[t1].append((int(d[2]) + 1, int(d[3]) + 1))
1366  self.XL[t2].append((int(d[3]) + 1, int(d[2]) + 1))
1367 
1368  def dist_matrix(self, skip_cmap=0, skip_xl=1, outname=None):
1369  K = self.namelist
1370  M = self.contactmap
1371  C, R = [], []
1372  proteins = self.protnames
1373 
1374  # exp new
1375  if skip_cmap == 0:
1376  Matrices = {}
1377  proteins = [p.get_name() for p in self.prot.get_children()]
1378  missing = []
1379  for p1 in range(len(proteins)):
1380  for p2 in range(p1, len(proteins)):
1381  pl1, pl2 = (max(self.resmap[proteins[p1]].keys()),
1382  max(self.resmap[proteins[p2]].keys()))
1383  pn1, pn2 = proteins[p1], proteins[p2]
1384  mtr = np.zeros((pl1 + 1, pl2 + 1))
1385  print('Creating matrix for: ',
1386  p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1387  for i1 in range(1, pl1 + 1):
1388  for i2 in range(1, pl2 + 1):
1389  try:
1390  r1 = K.index(self.resmap[pn1][i1])
1391  r2 = K.index(self.resmap[pn2][i2])
1392  r = M[r1, r2]
1393  mtr[i1 - 1, i2 - 1] = r
1394  except KeyError:
1395  missing.append((pn1, pn2, i1, i2))
1396  Matrices[(pn1, pn2)] = mtr
1397 
1398  # add cross-links
1399  if skip_xl == 0:
1400  if self.XL == {}:
1401  raise ValueError("cross-links were not provided, use "
1402  "add_xlinks function!")
1403  Matrices_xl = {}
1404  for p1 in range(len(proteins)):
1405  for p2 in range(p1, len(proteins)):
1406  pl1, pl2 = (max(self.resmap[proteins[p1]].keys()),
1407  max(self.resmap[proteins[p2]].keys()))
1408  pn1, pn2 = proteins[p1], proteins[p2]
1409  mtr = np.zeros((pl1 + 1, pl2 + 1))
1410  flg = 0
1411  try:
1412  xls = self.XL[(pn1, pn2)]
1413  except KeyError:
1414  try:
1415  xls = self.XL[(pn2, pn1)]
1416  flg = 1
1417  except KeyError:
1418  flg = 2
1419  if flg == 0:
1420  print('Creating matrix for: ',
1421  p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1422  for xl1, xl2 in xls:
1423  if xl1 > pl1:
1424  print('X' * 10, xl1, xl2)
1425  xl1 = pl1
1426  if xl2 > pl2:
1427  print('X' * 10, xl1, xl2)
1428  xl2 = pl2
1429  mtr[xl1 - 1, xl2 - 1] = 100
1430  elif flg == 1:
1431  print('Creating matrix for: ',
1432  p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1433  for xl1, xl2 in xls:
1434  if xl1 > pl1:
1435  print('X' * 10, xl1, xl2)
1436  xl1 = pl1
1437  if xl2 > pl2:
1438  print('X' * 10, xl1, xl2)
1439  xl2 = pl2
1440  mtr[xl2 - 1, xl1 - 1] = 100
1441  else:
1442  print('No cross-links between: ', pn1, pn2)
1443  Matrices_xl[(pn1, pn2)] = mtr
1444 
1445  # make list of protein names and create coordinate lists
1446  C = proteins
1447  # W is the component length list,
1448  # R is the contiguous coordinates list
1449  W, R = [], []
1450  for i, c in enumerate(C):
1451  cl = max(self.resmap[c].keys())
1452  W.append(cl)
1453  if i == 0:
1454  R.append(cl)
1455  else:
1456  R.append(R[-1] + cl)
1457 
1458  # start plotting
1459  if outname:
1460  # Don't require a display
1461  import matplotlib as mpl
1462  mpl.use('Agg')
1463  import matplotlib.pyplot as plt
1464  import matplotlib.gridspec as gridspec
1465  import scipy.sparse as sparse
1466 
1467  plt.figure()
1468  gs = gridspec.GridSpec(len(W), len(W),
1469  width_ratios=W,
1470  height_ratios=W)
1471 
1472  cnt = 0
1473  for x1, r1 in enumerate(R):
1474  for x2, r2 in enumerate(R):
1475  ax = plt.subplot(gs[cnt])
1476  if skip_cmap == 0:
1477  try:
1478  mtr = Matrices[(C[x1], C[x2])]
1479  except KeyError:
1480  mtr = Matrices[(C[x2], C[x1])].T
1481  ax.imshow(
1482  np.log(mtr),
1483  interpolation='nearest',
1484  vmin=0.,
1485  vmax=log(mtr.max()) if mtr.max() > 0. else 1.)
1486  ax.set_xticks([])
1487  ax.set_yticks([])
1488  if skip_xl == 0:
1489  try:
1490  mtr = Matrices_xl[(C[x1], C[x2])]
1491  except KeyError:
1492  mtr = Matrices_xl[(C[x2], C[x1])].T
1493  ax.spy(
1494  sparse.csr_matrix(mtr),
1495  markersize=10,
1496  color='white',
1497  linewidth=100,
1498  alpha=0.5)
1499  ax.set_xticks([])
1500  ax.set_yticks([])
1501 
1502  cnt += 1
1503  if x2 == 0:
1504  ax.set_ylabel(C[x1], rotation=90)
1505  if outname:
1506  plt.savefig(outname + ".pdf", dpi=300, transparent="False")
1507  else:
1508  plt.show()
1509 
1510 
1511 # ------------------------------------------------------------------
1512 # a few random tools
1513 
1514 def get_hiers_from_rmf(model, frame_number, rmf_file):
1515  # I have to deprecate this function
1516  print("getting coordinates for frame %i rmf file %s"
1517  % (frame_number, rmf_file))
1518 
1519  # load the frame
1520  rh = RMF.open_rmf_file_read_only(rmf_file)
1521 
1522  try:
1523  prots = IMP.rmf.create_hierarchies(rh, model)
1524  except IOError:
1525  print("Unable to open rmf file %s" % (rmf_file))
1526  return None
1527 
1528  try:
1529  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1530  except IOError:
1531  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1532  return None
1533  model.update()
1534  del rh
1535 
1536  return prots
1537 
1538 
1539 def link_hiers_to_rmf(model, hiers, frame_number, rmf_file):
1540  print("linking hierarchies for frame %i rmf file %s"
1541  % (frame_number, rmf_file))
1542  rh = RMF.open_rmf_file_read_only(rmf_file)
1543  IMP.rmf.link_hierarchies(rh, hiers)
1544  try:
1545  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1546  except: # noqa: E722
1547  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1548  return False
1549  model.update()
1550  del rh
1551  return True
1552 
1553 
1554 def get_hiers_and_restraints_from_rmf(model, frame_number, rmf_file):
1555  # I have to deprecate this function
1556  print("getting coordinates for frame %i rmf file %s"
1557  % (frame_number, rmf_file))
1558 
1559  # load the frame
1560  rh = RMF.open_rmf_file_read_only(rmf_file)
1561 
1562  try:
1563  prots = IMP.rmf.create_hierarchies(rh, model)
1564  rs = IMP.rmf.create_restraints(rh, model)
1565  except IOError:
1566  print("Unable to open rmf file %s" % (rmf_file))
1567  return None, None
1568  try:
1569  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1570  except: # noqa: E722
1571  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1572  return None, None
1573  model.update()
1574  del rh
1575 
1576  return prots, rs
1577 
1578 
1579 def link_hiers_and_restraints_to_rmf(model, hiers, rs, frame_number, rmf_file):
1580  print("linking hierarchies for frame %i rmf file %s"
1581  % (frame_number, rmf_file))
1582  rh = RMF.open_rmf_file_read_only(rmf_file)
1583  IMP.rmf.link_hierarchies(rh, hiers)
1584  IMP.rmf.link_restraints(rh, rs)
1585  try:
1586  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1587  except: # noqa: E722
1588  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1589  return False
1590  model.update()
1591  del rh
1592  return True
1593 
1594 
1596  """Get particles at res 1, or any beads, based on the name.
1597  No Representation is needed. This is mainly used when the hierarchy
1598  is read from an RMF file.
1599 
1600  @return a dictionary of component names and their particles
1601  @note If the root node is named "System" or is a "State", do
1602  proper selection.
1603  """
1604  particle_dict = {}
1605 
1606  # attempt to give good results for PMI2
1607  for mol in IMP.atom.get_by_type(prot, IMP.atom.MOLECULE_TYPE):
1608  sel = IMP.atom.Selection(mol, resolution=1)
1609  particle_dict[mol.get_name()] = sel.get_selected_particles()
1610  return particle_dict
1611 
1612 
1614  """Get particles at res 10, or any beads, based on the name.
1615  No Representation is needed.
1616  This is mainly used when the hierarchy is read from an RMF file.
1617 
1618  @return a dictionary of component names and their particles
1619  @note If the root node is named "System" or is a "State", do proper
1620  selection.
1621  """
1622  particle_dict = {}
1623  # attempt to give good results for PMI2
1624  for mol in IMP.atom.get_by_type(prot, IMP.atom.MOLECULE_TYPE):
1625  sel = IMP.atom.Selection(mol, resolution=10)
1626  particle_dict[mol.get_name()] = sel.get_selected_particles()
1627  return particle_dict
1628 
1629 
1631  """Visualization of cross-links"""
1632  def __init__(self):
1633  self.crosslinks = []
1634  self.external_csv_data = None
1635  self.crosslinkedprots = set()
1636  self.mindist = +10000000.0
1637  self.maxdist = -10000000.0
1638  self.contactmap = None
1639 
1640  def set_hierarchy(self, prot):
1641  self.prot_length_dict = {}
1642  self.model = prot.get_model()
1643 
1644  for i in prot.get_children():
1645  name = i.get_name()
1646  residue_indexes = []
1647  for p in IMP.atom.get_leaves(i):
1648  residue_indexes.extend(IMP.pmi.tools.get_residue_indexes(p))
1649 
1650  if len(residue_indexes) != 0:
1651  self.prot_length_dict[name] = max(residue_indexes)
1652 
1653  def set_coordinates_for_contact_map(self, rmf_name, rmf_frame_index):
1654  from scipy.spatial.distance import cdist
1655 
1656  rh = RMF.open_rmf_file_read_only(rmf_name)
1657  prots = IMP.rmf.create_hierarchies(rh, self.model)
1658  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
1659  print("getting coordinates for frame %i rmf file %s"
1660  % (rmf_frame_index, rmf_name))
1661  del rh
1662 
1663  coords = []
1664  radii = []
1665 
1666  particles_dictionary = get_particles_at_resolution_one(prots[0])
1667 
1668  resindex = 0
1669  self.index_dictionary = {}
1670 
1671  for name in particles_dictionary:
1672  residue_indexes = []
1673  for p in particles_dictionary[name]:
1674  print(p.get_name())
1675  residue_indexes = IMP.pmi.tools.get_residue_indexes(p)
1676 
1677  if len(residue_indexes) != 0:
1678 
1679  for res in range(min(residue_indexes),
1680  max(residue_indexes) + 1):
1681  d = IMP.core.XYZR(p)
1682 
1683  crd = np.array([d.get_x(), d.get_y(), d.get_z()])
1684  coords.append(crd)
1685  radii.append(d.get_radius())
1686  if name not in self.index_dictionary:
1687  self.index_dictionary[name] = [resindex]
1688  else:
1689  self.index_dictionary[name].append(resindex)
1690  resindex += 1
1691 
1692  coords = np.array(coords)
1693  radii = np.array(radii)
1694 
1695  distances = cdist(coords, coords)
1696  distances = (distances - radii).T - radii
1697 
1698  distances = np.where(distances <= 20.0, 1.0, 0)
1699  if self.contactmap is None:
1700  self.contactmap = np.zeros((len(coords), len(coords)))
1701  self.contactmap += distances
1702 
1703  for prot in prots:
1704  IMP.atom.destroy(prot)
1705 
1706  def set_crosslinks(
1707  self, data_file, search_label='ISDCrossLinkMS_Distance_',
1708  mapping=None, filter_label=None,
1709  # provide a list of rmf base names to filter the stat file
1710  filter_rmf_file_names=None, external_csv_data_file=None,
1711  external_csv_data_file_unique_id_key="Unique ID"):
1712 
1713  # example key ISDCrossLinkMS_Distance_intrarb_937-State:0-108:RPS3_55:RPS30-1-1-0.1_None # noqa: E501
1714  # mapping is a dictionary that maps standard keywords to entry
1715  # positions in the key string
1716  # confidence class is a filter that
1717  # external datafile is a datafile that contains further information
1718  # on the cross-links
1719  # it will use the unique id to create the dictionary keys
1720 
1721  po = IMP.pmi.output.ProcessOutput(data_file)
1722  keys = po.get_keys()
1723 
1724  xl_keys = [k for k in keys if search_label in k]
1725 
1726  if filter_rmf_file_names is not None:
1727  rmf_file_key = "local_rmf_file_name"
1728  fs = po.get_fields(xl_keys+[rmf_file_key])
1729  else:
1730  fs = po.get_fields(xl_keys)
1731 
1732  # this dictionary stores the occurency of given cross-links
1733  self.cross_link_frequency = {}
1734 
1735  # this dictionary stores the series of distances for given cross-linked
1736  # residues
1737  self.cross_link_distances = {}
1738 
1739  # this dictionary stores the series of distances for given cross-linked
1740  # residues
1741  self.cross_link_distances_unique = {}
1742 
1743  if external_csv_data_file is not None:
1744  # this dictionary stores the further information on cross-links
1745  # labeled by unique ID
1746  self.external_csv_data = {}
1747  xldb = IMP.pmi.tools.get_db_from_csv(external_csv_data_file)
1748 
1749  for xl in xldb:
1750  self.external_csv_data[
1751  xl[external_csv_data_file_unique_id_key]] = xl
1752 
1753  # this list keeps track the tuple of cross-links and sample
1754  # so that we don't count twice the same cross-linked residues in the
1755  # same sample
1756  cross_link_frequency_list = []
1757 
1758  self.unique_cross_link_list = []
1759 
1760  for key in xl_keys:
1761  print(key)
1762  keysplit = key.replace("_", " ").replace("-", " ").replace(
1763  ":", " ").split()
1764 
1765  if filter_label is not None:
1766  if filter_label not in keysplit:
1767  continue
1768 
1769  if mapping is None:
1770  r1 = int(keysplit[5])
1771  c1 = keysplit[6]
1772  r2 = int(keysplit[7])
1773  c2 = keysplit[8]
1774  try:
1775  confidence = keysplit[12]
1776  except: # noqa: E722
1777  confidence = '0.0'
1778  try:
1779  unique_identifier = keysplit[3]
1780  except: # noqa: E722
1781  unique_identifier = '0'
1782  else:
1783  r1 = int(keysplit[mapping["Residue1"]])
1784  c1 = keysplit[mapping["Protein1"]]
1785  r2 = int(keysplit[mapping["Residue2"]])
1786  c2 = keysplit[mapping["Protein2"]]
1787  try:
1788  confidence = keysplit[mapping["Confidence"]]
1789  except: # noqa: E722
1790  confidence = '0.0'
1791  try:
1792  unique_identifier = keysplit[mapping["Unique Identifier"]]
1793  except: # noqa: E722
1794  unique_identifier = '0'
1795 
1796  self.crosslinkedprots.add(c1)
1797  self.crosslinkedprots.add(c2)
1798 
1799  # construct the list of distances corresponding to the input rmf
1800  # files
1801 
1802  dists = []
1803  if filter_rmf_file_names is not None:
1804  for n, d in enumerate(fs[key]):
1805  if fs[rmf_file_key][n] in filter_rmf_file_names:
1806  dists.append(float(d))
1807  else:
1808  dists = [float(f) for f in fs[key]]
1809 
1810  # check if the input confidence class corresponds to the
1811  # one of the cross-link
1812 
1813  mdist = self.median(dists)
1814 
1815  stdv = np.std(np.array(dists))
1816  if self.mindist > mdist:
1817  self.mindist = mdist
1818  if self.maxdist < mdist:
1819  self.maxdist = mdist
1820 
1821  # calculate the frequency of unique cross-links within the same
1822  # sample
1823  if (r1, c1, r2, c2, mdist) not in cross_link_frequency_list:
1824  if (r1, c1, r2, c2) not in self.cross_link_frequency:
1825  self.cross_link_frequency[(r1, c1, r2, c2)] = 1
1826  self.cross_link_frequency[(r2, c2, r1, c1)] = 1
1827  else:
1828  self.cross_link_frequency[(r2, c2, r1, c1)] += 1
1829  self.cross_link_frequency[(r1, c1, r2, c2)] += 1
1830  cross_link_frequency_list.append((r1, c1, r2, c2))
1831  cross_link_frequency_list.append((r2, c2, r1, c1))
1832  self.unique_cross_link_list.append(
1833  (r1, c1, r2, c2, mdist))
1834 
1835  if (r1, c1, r2, c2) not in self.cross_link_distances:
1836  self.cross_link_distances[(
1837  r1, c1, r2, c2, mdist, confidence)] = dists
1838  self.cross_link_distances[(
1839  r2, c2, r1, c1, mdist, confidence)] = dists
1840  self.cross_link_distances_unique[(r1, c1, r2, c2)] = dists
1841  else:
1842  self.cross_link_distances[(
1843  r2, c2, r1, c1, mdist, confidence)] += dists
1844  self.cross_link_distances[(
1845  r1, c1, r2, c2, mdist, confidence)] += dists
1846 
1847  self.crosslinks.append(
1848  (r1, c1, r2, c2, mdist, stdv, confidence, unique_identifier,
1849  'original'))
1850  self.crosslinks.append(
1851  (r2, c2, r1, c1, mdist, stdv, confidence, unique_identifier,
1852  'reversed'))
1853 
1854  self.cross_link_frequency_inverted = {}
1855  for xl in self.unique_cross_link_list:
1856  (r1, c1, r2, c2, mdist) = xl
1857  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
1858  if frequency not in self.cross_link_frequency_inverted:
1859  self.cross_link_frequency_inverted[
1860  frequency] = [(r1, c1, r2, c2)]
1861  else:
1862  self.cross_link_frequency_inverted[
1863  frequency].append((r1, c1, r2, c2))
1864 
1865  # -------------
1866 
1867  def median(self, mylist):
1868  sorts = sorted(mylist)
1869  length = len(sorts)
1870  print(length)
1871  if length == 1:
1872  return mylist[0]
1873  if not length % 2:
1874  return (sorts[length / 2] + sorts[length / 2 - 1]) / 2.0
1875  return sorts[length / 2]
1876 
1877  def set_threshold(self, threshold):
1878  self.threshold = threshold
1879 
1880  def set_tolerance(self, tolerance):
1881  self.tolerance = tolerance
1882 
1883  def colormap(self, dist):
1884  if dist < self.threshold - self.tolerance:
1885  return "Green"
1886  elif dist >= self.threshold + self.tolerance:
1887  return "Orange"
1888  else:
1889  return "Red"
1890 
1891  def write_cross_link_database(self, filename, format='csv'):
1892  import csv
1893 
1894  fieldnames = [
1895  "Unique ID", "Protein1", "Residue1", "Protein2", "Residue2",
1896  "Median Distance", "Standard Deviation", "Confidence",
1897  "Frequency", "Arrangement"]
1898 
1899  if self.external_csv_data is not None:
1900  keys = list(self.external_csv_data.keys())
1901  innerkeys = list(self.external_csv_data[keys[0]].keys())
1902  innerkeys.sort()
1903  fieldnames += innerkeys
1904 
1905  dw = csv.DictWriter(
1906  open(filename, "w"),
1907  delimiter=',',
1908  fieldnames=fieldnames)
1909  dw.writeheader()
1910  for xl in self.crosslinks:
1911  (r1, c1, r2, c2, mdist, stdv, confidence,
1912  unique_identifier, descriptor) = xl
1913  if descriptor == 'original':
1914  outdict = {}
1915  outdict["Unique ID"] = unique_identifier
1916  outdict["Protein1"] = c1
1917  outdict["Protein2"] = c2
1918  outdict["Residue1"] = r1
1919  outdict["Residue2"] = r2
1920  outdict["Median Distance"] = mdist
1921  outdict["Standard Deviation"] = stdv
1922  outdict["Confidence"] = confidence
1923  outdict["Frequency"] = self.cross_link_frequency[
1924  (r1, c1, r2, c2)]
1925  if c1 == c2:
1926  arrangement = "Intra"
1927  else:
1928  arrangement = "Inter"
1929  outdict["Arrangement"] = arrangement
1930  if self.external_csv_data is not None:
1931  outdict.update(self.external_csv_data[unique_identifier])
1932 
1933  dw.writerow(outdict)
1934 
1935  def plot(self, prot_listx=None, prot_listy=None, no_dist_info=False,
1936  no_confidence_info=False, filter=None, layout="whole",
1937  crosslinkedonly=False, filename=None, confidence_classes=None,
1938  alphablend=0.1, scale_symbol_size=1.0, gap_between_components=0,
1939  rename_protein_map=None):
1940  # layout can be:
1941  # "lowerdiagonal" print only the lower diagonal plot
1942  # "upperdiagonal" print only the upper diagonal plot
1943  # "whole" print all
1944  # crosslinkedonly: plot only components that have cross-links
1945  # no_dist_info: if True will plot only the cross-links as grey spots
1946  # filter = tuple the tuple contains a keyword to be search
1947  # in the database
1948  # a relationship ">","==","<"
1949  # and a value
1950  # example ("ID_Score",">",40)
1951  # scale_symbol_size rescale the symbol for the cross-link
1952  # rename_protein_map is a dictionary to rename proteins
1953 
1954  import matplotlib as mpl
1955  mpl.use('Agg')
1956  import matplotlib.pyplot as plt
1957  import matplotlib.cm as cm
1958 
1959  fig = plt.figure(figsize=(10, 10))
1960  ax = fig.add_subplot(111)
1961 
1962  ax.set_xticks([])
1963  ax.set_yticks([])
1964 
1965  # set the list of proteins on the x axis
1966  if prot_listx is None:
1967  if crosslinkedonly:
1968  prot_listx = list(self.crosslinkedprots)
1969  else:
1970  prot_listx = list(self.prot_length_dict.keys())
1971  prot_listx.sort()
1972 
1973  nresx = gap_between_components + \
1974  sum([self.prot_length_dict[name]
1975  + gap_between_components for name in prot_listx])
1976 
1977  # set the list of proteins on the y axis
1978 
1979  if prot_listy is None:
1980  if crosslinkedonly:
1981  prot_listy = list(self.crosslinkedprots)
1982  else:
1983  prot_listy = list(self.prot_length_dict.keys())
1984  prot_listy.sort()
1985 
1986  nresy = gap_between_components + \
1987  sum([self.prot_length_dict[name]
1988  + gap_between_components for name in prot_listy])
1989 
1990  # this is the residue offset for each protein
1991  resoffsetx = {}
1992  resendx = {}
1993  res = gap_between_components
1994  for prot in prot_listx:
1995  resoffsetx[prot] = res
1996  res += self.prot_length_dict[prot]
1997  resendx[prot] = res
1998  res += gap_between_components
1999 
2000  resoffsety = {}
2001  resendy = {}
2002  res = gap_between_components
2003  for prot in prot_listy:
2004  resoffsety[prot] = res
2005  res += self.prot_length_dict[prot]
2006  resendy[prot] = res
2007  res += gap_between_components
2008 
2009  resoffsetdiagonal = {}
2010  res = gap_between_components
2011  for prot in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2012  resoffsetdiagonal[prot] = res
2013  res += self.prot_length_dict[prot]
2014  res += gap_between_components
2015 
2016  # plot protein boundaries
2017 
2018  xticks = []
2019  xlabels = []
2020  for n, prot in enumerate(prot_listx):
2021  res = resoffsetx[prot]
2022  end = resendx[prot]
2023  for proty in prot_listy:
2024  resy = resoffsety[proty]
2025  endy = resendy[proty]
2026  ax.plot([res, res], [resy, endy], 'k-', lw=0.4)
2027  ax.plot([end, end], [resy, endy], 'k-', lw=0.4)
2028  xticks.append((float(res) + float(end)) / 2)
2029  if rename_protein_map is not None:
2030  if prot in rename_protein_map:
2031  prot = rename_protein_map[prot]
2032  xlabels.append(prot)
2033 
2034  yticks = []
2035  ylabels = []
2036  for n, prot in enumerate(prot_listy):
2037  res = resoffsety[prot]
2038  end = resendy[prot]
2039  for protx in prot_listx:
2040  resx = resoffsetx[protx]
2041  endx = resendx[protx]
2042  ax.plot([resx, endx], [res, res], 'k-', lw=0.4)
2043  ax.plot([resx, endx], [end, end], 'k-', lw=0.4)
2044  yticks.append((float(res) + float(end)) / 2)
2045  if rename_protein_map is not None:
2046  if prot in rename_protein_map:
2047  prot = rename_protein_map[prot]
2048  ylabels.append(prot)
2049 
2050  # plot the contact map
2051  print(prot_listx, prot_listy)
2052 
2053  if self.contactmap is not None:
2054  tmp_array = np.zeros((nresx, nresy))
2055 
2056  for px in prot_listx:
2057  print(px)
2058  for py in prot_listy:
2059  print(py)
2060  resx = resoffsety[px]
2061  lengx = resendx[px] - 1
2062  resy = resoffsety[py]
2063  lengy = resendy[py] - 1
2064  indexes_x = self.index_dictionary[px]
2065  minx = min(indexes_x)
2066  maxx = max(indexes_x)
2067  indexes_y = self.index_dictionary[py]
2068  miny = min(indexes_y)
2069  maxy = max(indexes_y)
2070 
2071  print(px, py, minx, maxx, miny, maxy)
2072 
2073  try:
2074  tmp_array[
2075  resx:lengx,
2076  resy:lengy] = self.contactmap[
2077  minx:maxx,
2078  miny:maxy]
2079  except: # noqa: E722
2080  continue
2081 
2082  ax.imshow(tmp_array,
2083  cmap=cm.binary,
2084  origin='lower',
2085  interpolation='nearest')
2086 
2087  ax.set_xticks(xticks)
2088  ax.set_xticklabels(xlabels, rotation=90)
2089  ax.set_yticks(yticks)
2090  ax.set_yticklabels(ylabels)
2091  ax.set_xlim(0, nresx)
2092  ax.set_ylim(0, nresy)
2093 
2094  # set the cross-links
2095 
2096  already_added_xls = []
2097 
2098  for xl in self.crosslinks:
2099 
2100  (r1, c1, r2, c2, mdist, stdv, confidence,
2101  unique_identifier, descriptor) = xl
2102 
2103  if confidence_classes is not None:
2104  if confidence not in confidence_classes:
2105  continue
2106 
2107  try:
2108  pos1 = r1 + resoffsetx[c1]
2109  except: # noqa: E722
2110  continue
2111  try:
2112  pos2 = r2 + resoffsety[c2]
2113  except: # noqa: E722
2114  continue
2115 
2116  if filter is not None:
2117  xldb = self.external_csv_data[unique_identifier]
2118  xldb.update({"Distance": mdist})
2119  xldb.update({"Distance_stdv": stdv})
2120 
2121  if filter[1] == ">":
2122  if float(xldb[filter[0]]) <= float(filter[2]):
2123  continue
2124 
2125  if filter[1] == "<":
2126  if float(xldb[filter[0]]) >= float(filter[2]):
2127  continue
2128 
2129  if filter[1] == "==":
2130  if float(xldb[filter[0]]) != float(filter[2]):
2131  continue
2132 
2133  # all that below is used for plotting the diagonal
2134  # when you have a rectangolar plots
2135 
2136  pos_for_diagonal1 = r1 + resoffsetdiagonal[c1]
2137  pos_for_diagonal2 = r2 + resoffsetdiagonal[c2]
2138 
2139  if layout == 'lowerdiagonal':
2140  if pos_for_diagonal1 <= pos_for_diagonal2:
2141  continue
2142  if layout == 'upperdiagonal':
2143  if pos_for_diagonal1 >= pos_for_diagonal2:
2144  continue
2145 
2146  already_added_xls.append((r1, c1, r2, c2))
2147 
2148  if not no_confidence_info:
2149  if confidence == '0.01':
2150  markersize = 14 * scale_symbol_size
2151  elif confidence == '0.05':
2152  markersize = 9 * scale_symbol_size
2153  elif confidence == '0.1':
2154  markersize = 6 * scale_symbol_size
2155  else:
2156  markersize = 15 * scale_symbol_size
2157  else:
2158  markersize = 5 * scale_symbol_size
2159 
2160  if not no_dist_info:
2161  color = self.colormap(mdist)
2162  else:
2163  color = "gray"
2164 
2165  ax.plot(
2166  [pos1],
2167  [pos2],
2168  'o',
2169  c=color,
2170  alpha=alphablend,
2171  markersize=markersize)
2172 
2173  fig.set_size_inches(0.004 * nresx, 0.004 * nresy)
2174 
2175  for i in ax.spines.values():
2176  i.set_linewidth(2.0)
2177 
2178  if filename:
2179  plt.savefig(filename + ".pdf", dpi=300, transparent="False")
2180  else:
2181  plt.show()
2182 
2183  def get_frequency_statistics(self, prot_list,
2184  prot_list2=None):
2185 
2186  violated_histogram = {}
2187  satisfied_histogram = {}
2188  unique_cross_links = []
2189 
2190  for xl in self.unique_cross_link_list:
2191  (r1, c1, r2, c2, mdist) = xl
2192 
2193  # here we filter by the protein
2194  if prot_list2 is None:
2195  if c1 not in prot_list:
2196  continue
2197  if c2 not in prot_list:
2198  continue
2199  else:
2200  if c1 in prot_list and c2 in prot_list2:
2201  pass
2202  elif c1 in prot_list2 and c2 in prot_list:
2203  pass
2204  else:
2205  continue
2206 
2207  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
2208 
2209  if (r1, c1, r2, c2) not in unique_cross_links:
2210  if mdist > 35.0:
2211  if frequency not in violated_histogram:
2212  violated_histogram[frequency] = 1
2213  else:
2214  violated_histogram[frequency] += 1
2215  else:
2216  if frequency not in satisfied_histogram:
2217  satisfied_histogram[frequency] = 1
2218  else:
2219  satisfied_histogram[frequency] += 1
2220  unique_cross_links.append((r1, c1, r2, c2))
2221  unique_cross_links.append((r2, c2, r1, c1))
2222 
2223  print("# satisfied")
2224 
2225  total_number_of_crosslinks = 0
2226 
2227  for i in satisfied_histogram:
2228  # if i in violated_histogram:
2229  # print i, satisfied_histogram[i]+violated_histogram[i]
2230  # else:
2231  if i in violated_histogram:
2232  print(i, violated_histogram[i]+satisfied_histogram[i])
2233  else:
2234  print(i, satisfied_histogram[i])
2235  total_number_of_crosslinks += i*satisfied_histogram[i]
2236 
2237  print("# violated")
2238 
2239  for i in violated_histogram:
2240  print(i, violated_histogram[i])
2241  total_number_of_crosslinks += i*violated_histogram[i]
2242 
2243  print(total_number_of_crosslinks)
2244 
2245  def print_cross_link_binary_symbols(self, prot_list,
2246  prot_list2=None):
2247  tmp_matrix = []
2248  confidence_list = []
2249  for xl in self.crosslinks:
2250  (r1, c1, r2, c2, mdist, stdv, confidence,
2251  unique_identifier, descriptor) = xl
2252 
2253  if prot_list2 is None:
2254  if c1 not in prot_list:
2255  continue
2256  if c2 not in prot_list:
2257  continue
2258  else:
2259  if c1 in prot_list and c2 in prot_list2:
2260  pass
2261  elif c1 in prot_list2 and c2 in prot_list:
2262  pass
2263  else:
2264  continue
2265 
2266  if descriptor != "original":
2267  continue
2268 
2269  confidence_list.append(confidence)
2270 
2271  dists = self.cross_link_distances_unique[(r1, c1, r2, c2)]
2272  tmp_dist_binary = []
2273  for d in dists:
2274  if d < 35:
2275  tmp_dist_binary.append(1)
2276  else:
2277  tmp_dist_binary.append(0)
2278  tmp_matrix.append(tmp_dist_binary)
2279 
2280  matrix = list(zip(*tmp_matrix))
2281 
2282  satisfied_high_sum = 0
2283  satisfied_mid_sum = 0
2284  satisfied_low_sum = 0
2285  total_satisfied_sum = 0
2286  for k, m in enumerate(matrix):
2287  satisfied_high = 0
2288  total_high = 0
2289  satisfied_mid = 0
2290  total_mid = 0
2291  satisfied_low = 0
2292  total_low = 0
2293  total_satisfied = 0
2294  total = 0
2295  for n, b in enumerate(m):
2296  if confidence_list[n] == "0.01":
2297  total_high += 1
2298  if b == 1:
2299  satisfied_high += 1
2300  satisfied_high_sum += 1
2301  elif confidence_list[n] == "0.05":
2302  total_mid += 1
2303  if b == 1:
2304  satisfied_mid += 1
2305  satisfied_mid_sum += 1
2306  elif confidence_list[n] == "0.1":
2307  total_low += 1
2308  if b == 1:
2309  satisfied_low += 1
2310  satisfied_low_sum += 1
2311  if b == 1:
2312  total_satisfied += 1
2313  total_satisfied_sum += 1
2314  total += 1
2315  print(k, satisfied_high, total_high)
2316  print(k, satisfied_mid, total_mid)
2317  print(k, satisfied_low, total_low)
2318  print(k, total_satisfied, total)
2319  print(float(satisfied_high_sum) / len(matrix))
2320  print(float(satisfied_mid_sum) / len(matrix))
2321  print(float(satisfied_low_sum) / len(matrix))
2322 # ------------
2323 
2324  def get_unique_crosslinks_statistics(self, prot_list,
2325  prot_list2=None):
2326 
2327  print(prot_list)
2328  print(prot_list2)
2329  satisfied_high = 0
2330  total_high = 0
2331  satisfied_mid = 0
2332  total_mid = 0
2333  satisfied_low = 0
2334  total_low = 0
2335  total = 0
2336  tmp_matrix = []
2337  satisfied_string = []
2338  for xl in self.crosslinks:
2339  (r1, c1, r2, c2, mdist, stdv, confidence,
2340  unique_identifier, descriptor) = xl
2341 
2342  if prot_list2 is None:
2343  if c1 not in prot_list:
2344  continue
2345  if c2 not in prot_list:
2346  continue
2347  else:
2348  if c1 in prot_list and c2 in prot_list2:
2349  pass
2350  elif c1 in prot_list2 and c2 in prot_list:
2351  pass
2352  else:
2353  continue
2354 
2355  if descriptor != "original":
2356  continue
2357 
2358  total += 1
2359  if confidence == "0.01":
2360  total_high += 1
2361  if mdist <= 35:
2362  satisfied_high += 1
2363  if confidence == "0.05":
2364  total_mid += 1
2365  if mdist <= 35:
2366  satisfied_mid += 1
2367  if confidence == "0.1":
2368  total_low += 1
2369  if mdist <= 35:
2370  satisfied_low += 1
2371  if mdist <= 35:
2372  satisfied_string.append(1)
2373  else:
2374  satisfied_string.append(0)
2375 
2376  dists = self.cross_link_distances_unique[(r1, c1, r2, c2)]
2377  tmp_dist_binary = []
2378  for d in dists:
2379  if d < 35:
2380  tmp_dist_binary.append(1)
2381  else:
2382  tmp_dist_binary.append(0)
2383  tmp_matrix.append(tmp_dist_binary)
2384 
2385  print("unique satisfied_high/total_high", satisfied_high, "/",
2386  total_high)
2387  print("unique satisfied_mid/total_mid", satisfied_mid, "/", total_mid)
2388  print("unique satisfied_low/total_low", satisfied_low, "/", total_low)
2389  print("total", total)
2390 
2391  matrix = list(zip(*tmp_matrix))
2392  satisfied_models = 0
2393  satstr = ""
2394  for b in satisfied_string:
2395  if b == 0:
2396  satstr += "-"
2397  if b == 1:
2398  satstr += "*"
2399 
2400  for m in matrix:
2401  all_satisfied = True
2402  string = ""
2403  for n, b in enumerate(m):
2404  if b == 0:
2405  string += "0"
2406  if b == 1:
2407  string += "1"
2408  if b == 1 and satisfied_string[n] == 1:
2409  pass
2410  elif b == 1 and satisfied_string[n] == 0:
2411  pass
2412  elif b == 0 and satisfied_string[n] == 0:
2413  pass
2414  elif b == 0 and satisfied_string[n] == 1:
2415  all_satisfied = False
2416  if all_satisfied:
2417  satisfied_models += 1
2418  print(string)
2419  print(satstr, all_satisfied)
2420  print("models that satisfies the median satisfied cross-links/total "
2421  "models", satisfied_models, len(matrix))
2422 
2423  def plot_matrix_cross_link_distances_unique(self, figurename, prot_list,
2424  prot_list2=None):
2425 
2426  import matplotlib as mpl
2427  mpl.use('Agg')
2428  import matplotlib.pylab as pl
2429 
2430  tmp_matrix = []
2431  for kw in self.cross_link_distances_unique:
2432  (r1, c1, r2, c2) = kw
2433  dists = self.cross_link_distances_unique[kw]
2434 
2435  if prot_list2 is None:
2436  if c1 not in prot_list:
2437  continue
2438  if c2 not in prot_list:
2439  continue
2440  else:
2441  if c1 in prot_list and c2 in prot_list2:
2442  pass
2443  elif c1 in prot_list2 and c2 in prot_list:
2444  pass
2445  else:
2446  continue
2447  # append the sum of dists to order by that in the matrix plot
2448  dists.append(sum(dists))
2449  tmp_matrix.append(dists)
2450 
2451  tmp_matrix.sort(key=itemgetter(len(tmp_matrix[0]) - 1))
2452 
2453  # print len(tmp_matrix), len(tmp_matrix[0])-1
2454  matrix = np.zeros((len(tmp_matrix), len(tmp_matrix[0]) - 1))
2455 
2456  for i in range(len(tmp_matrix)):
2457  for k in range(len(tmp_matrix[i]) - 1):
2458  matrix[i][k] = tmp_matrix[i][k]
2459 
2460  print(matrix)
2461 
2462  fig = pl.figure()
2463  ax = fig.add_subplot(211)
2464 
2465  cax = ax.imshow(matrix, interpolation='nearest')
2466  fig.colorbar(cax)
2467  pl.savefig(figurename, dpi=300)
2468  pl.show()
2469 
2470  def plot_bars(
2471  self,
2472  filename,
2473  prots1,
2474  prots2,
2475  nxl_per_row=20,
2476  arrangement="inter",
2477  confidence_input="None"):
2478 
2479  data = []
2480  for xl in self.cross_link_distances:
2481  (r1, c1, r2, c2, mdist, confidence) = xl
2482  if c1 in prots1 and c2 in prots2:
2483  if arrangement == "inter" and c1 == c2:
2484  continue
2485  if arrangement == "intra" and c1 != c2:
2486  continue
2487  if confidence_input == confidence:
2488  label = str(c1) + ":" + str(r1) + \
2489  "-" + str(c2) + ":" + str(r2)
2490  values = self.cross_link_distances[xl]
2491  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
2492  data.append((label, values, mdist, frequency))
2493 
2494  sort_by_dist = sorted(data, key=lambda tup: tup[2])
2495  sort_by_dist = list(zip(*sort_by_dist))
2496  values = sort_by_dist[1]
2497  positions = list(range(len(values)))
2498  labels = sort_by_dist[0]
2499  frequencies = list(map(float, sort_by_dist[3]))
2500  frequencies = [f * 10.0 for f in frequencies]
2501 
2502  nchunks = int(float(len(values)) / nxl_per_row)
2503  values_chunks = IMP.pmi.tools.chunk_list_into_segments(values, nchunks)
2504  positions_chunks = IMP.pmi.tools.chunk_list_into_segments(
2505  positions,
2506  nchunks)
2507  frequencies_chunks = IMP.pmi.tools.chunk_list_into_segments(
2508  frequencies,
2509  nchunks)
2510  labels_chunks = IMP.pmi.tools.chunk_list_into_segments(labels, nchunks)
2511 
2512  for n, v in enumerate(values_chunks):
2513  p = positions_chunks[n]
2514  f = frequencies_chunks[n]
2516  filename + "." + str(n), v, p, f,
2517  valuename="Distance (Ang)",
2518  positionname="Unique " + arrangement + " Crosslinks",
2519  xlabels=labels_chunks[n])
2520 
2521  def crosslink_distance_histogram(self, filename,
2522  prot_list=None,
2523  prot_list2=None,
2524  confidence_classes=None,
2525  bins=40,
2526  color='#66CCCC',
2527  yplotrange=[0, 1],
2528  format="png",
2529  normalized=False):
2530  if prot_list is None:
2531  prot_list = list(self.prot_length_dict.keys())
2532 
2533  distances = []
2534  for xl in self.crosslinks:
2535  (r1, c1, r2, c2, mdist, stdv, confidence,
2536  unique_identifier, descriptor) = xl
2537 
2538  if confidence_classes is not None:
2539  if confidence not in confidence_classes:
2540  continue
2541 
2542  if prot_list2 is None:
2543  if c1 not in prot_list:
2544  continue
2545  if c2 not in prot_list:
2546  continue
2547  else:
2548  if c1 in prot_list and c2 in prot_list2:
2549  pass
2550  elif c1 in prot_list2 and c2 in prot_list:
2551  pass
2552  else:
2553  continue
2554 
2555  distances.append(mdist)
2556 
2558  filename, distances, valuename="C-alpha C-alpha distance [Ang]",
2559  bins=bins, color=color,
2560  format=format,
2561  reference_xline=35.0,
2562  yplotrange=yplotrange, normalized=normalized)
2563 
2564  def scatter_plot_xl_features(self, filename,
2565  feature1=None,
2566  feature2=None,
2567  prot_list=None,
2568  prot_list2=None,
2569  yplotrange=None,
2570  reference_ylines=None,
2571  distance_color=True,
2572  format="png"):
2573  import matplotlib as mpl
2574  mpl.use('Agg')
2575  import matplotlib.pyplot as plt
2576 
2577  fig = plt.figure(figsize=(10, 10))
2578  ax = fig.add_subplot(111)
2579 
2580  for xl in self.crosslinks:
2581  (r1, c1, r2, c2, mdist, stdv, confidence,
2582  unique_identifier, arrangement) = xl
2583 
2584  if prot_list2 is None:
2585  if c1 not in prot_list:
2586  continue
2587  if c2 not in prot_list:
2588  continue
2589  else:
2590  if c1 in prot_list and c2 in prot_list2:
2591  pass
2592  elif c1 in prot_list2 and c2 in prot_list:
2593  pass
2594  else:
2595  continue
2596 
2597  xldb = self.external_csv_data[unique_identifier]
2598  xldb.update({"Distance": mdist})
2599  xldb.update({"Distance_stdv": stdv})
2600 
2601  xvalue = float(xldb[feature1])
2602  yvalue = float(xldb[feature2])
2603 
2604  if distance_color:
2605  color = self.colormap(mdist)
2606  else:
2607  color = "gray"
2608 
2609  ax.plot([xvalue], [yvalue], 'o', c=color, alpha=0.1, markersize=7)
2610 
2611  if yplotrange is not None:
2612  ax.set_ylim(yplotrange)
2613  if reference_ylines is not None:
2614  for rl in reference_ylines:
2615  ax.axhline(rl, color='red', linestyle='dashed', linewidth=1)
2616 
2617  if filename:
2618  plt.savefig(filename + "." + format, dpi=150, transparent="False")
2619 
2620  plt.show()
def get_output
Returns output for IMP.pmi.output.Output object.
Simple 3D transformation class.
Visualization of cross-links.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
double get_drms(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
A class for reading stat files (either rmf or ascii v1 and v2)
Definition: output.py:931
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
def plot_field_histogram
Plot a list of histograms from a value list.
Definition: output.py:1754
def plot_fields_box_plots
Plot time series as boxplots.
Definition: output.py:1819
double get_drmsd(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1)
Calculate distance the root mean square deviation between two sets of 3D points.
Definition: atom/distance.h:49
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1159
Miscellaneous utilities.
Definition: tools.py:1
def __init__
Constructor.
void handle_use_deprecated(std::string message)
Break in this method in gdb to find deprecated uses at runtime.
double get_weighted_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2, const Floats &weights)
Compute the RMSD (without alignment) taking into account the copy ambiguity.
void link_restraints(RMF::FileConstHandle fh, const Restraints &hs)
A class to evaluate the precision of an ensemble.
log
Definition: log.py:1
A class to cluster structures.
def get_rmsd_wrt_reference_structure_with_alignment
First align then calculate RMSD.
def __init__
Constructor.
Definition: pmi/Analysis.py:30
Class for sampling a density map from particles.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
def add_structure
Read a structure into the ensemble and store (as coordinates).
DensityMap * multiply(const DensityMap *m1, const DensityMap *m2)
Return a density map for which voxel i contains the result of m1[i]*m2[i].
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:21
def add_subunits_density
Add a frame to the densities.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
Definition: DensityMap.h:678
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:542
double get_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
Basic utilities for handling cryo-electron microscopy 3D density maps.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name.
def fill
Add coordinates for a single model.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
def do_cluster
Run K-means clustering.
3D rotation class.
Definition: Rotation3D.h:52
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:509
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def set_reference_structure
Read in a structure used for reference computation.
def get_rmsf
Calculate the residue mean square fluctuations (RMSF).
def get_average_distance_wrt_reference_structure
Compare the structure set to the reference structure.
def get_density
Get the current density for some component name.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
The general base class for IMP exceptions.
Definition: exception.h:48
def get_precision
Evaluate the precision of two named structure groups.
VectorD< 3 > Vector3D
Definition: VectorD.h:408
Restraints create_restraints(RMF::FileConstHandle fh, Model *m)
def get_moldict_coorddict
return data structure for the RMSD calculation
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def get_particles_at_resolution_ten
Get particles at res 10, or any beads, based on the name.
Python classes to represent, score, sample and analyze models.
def add_structures
Read a list of RMFs, supports parallel.
Transformation3D get_transformation_aligning_first_to_second(Vector3Ds a, Vector3Ds b)
Hierarchies get_leaves(const Selection &h)
double get_drmsd_Q(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1, double threshold)
Definition: atom/distance.h:85
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Compute mean density maps from structures.
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:499
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27