IMP logo
IMP Reference Guide  2.17.0
The Integrative Modeling Platform
pmi/Analysis.py
1 #!/usr/bin/env python
2 
3 """@namespace IMP.pmi.analysis
4  Tools for clustering and cluster analysis
5 """
6 from __future__ import print_function
7 import IMP
8 import IMP.algebra
9 import IMP.em
10 import IMP.pmi
11 import IMP.pmi.tools
12 import IMP.pmi.output
13 import IMP.rmf
14 import RMF
15 import IMP.pmi.analysis
16 from operator import itemgetter
17 from math import log, sqrt
18 import itertools
19 import numpy as np
20 
21 
22 class Alignment(object):
23  """Performs alignment and RMSD calculation for two sets of coordinates
24 
25  The class also takes into account non-equal stoichiometry of the proteins.
26  If this is the case, the protein names of proteins in multiple copies
27  should be specified in the following form:
28  nameA..1, nameA..2 (note two dots).
29  """
30 
31  def __init__(self, template, query, weights=None):
32  """Constructor.
33  @param query {'p1':coords(L,3), 'p2':coords(L,3)}
34  @param template {'p1':coords(L,3), 'p2':coords(L,3)}
35  @param weights optional weights for each set of coordinates
36  """
37  self.query = query
38  self.template = template
39  self.weights = weights
40 
41  if len(self.query.keys()) != len(self.template.keys()):
42  raise ValueError('''the number of proteins
43  in template and query does not match!''')
44 
45  def permute(self):
46  # get unique protein names for each protein
47  # this is, unfortunately, expecting that names are all 'Molname..X'
48  # where X is different for each copy.
49  self.proteins = sorted(self.query.keys())
50  prots_uniq = [i.split('..')[0] for i in self.proteins]
51 
52  # for each unique name, store list of permutations
53  # e.g. for keys A..1,A..2 store P[A] = [[A..1,A..2],[A..2,A..1]]
54  # then store the product: [[[A1,A2],[B1,B2]],[[A1,A2],[B2,B1]],
55  # [[A2,A1],[B1,B2]],[[A2,A1],[B2,B1]]]
56  P = {}
57  for p in prots_uniq:
58  copies = [i for i in self.proteins if i.split('..')[0] == p]
59  prmts = list(itertools.permutations(copies, len(copies)))
60  P[p] = prmts
61  self.P = P
62  self.Product = list(itertools.product(*P.values()))
63 
64  def get_rmsd(self):
65 
66  self.permute()
67 
68  template_xyz = []
69  weights = []
70  torder = sum([list(i) for i in self.Product[0]], [])
71  for t in torder:
72  template_xyz += [IMP.algebra.Vector3D(i) for i in self.template[t]]
73  if self.weights is not None:
74  weights += [i for i in self.weights[t]]
75 
76  self.rmsd = 10000000000.
77  for comb in self.Product:
78 
79  order = sum([list(i) for i in comb], [])
80  query_xyz = []
81  for p in order:
82  query_xyz += [IMP.algebra.Vector3D(i) for i in self.query[p]]
83 
84  if self.weights is not None:
86  template_xyz, query_xyz, weights)
87  else:
88  dist = IMP.algebra.get_rmsd(template_xyz, query_xyz)
89  if dist < self.rmsd:
90  self.rmsd = dist
91  return self.rmsd
92 
93  def align(self):
94  from scipy.spatial.distance import cdist
95 
96  self.permute()
97 
98  # create flat coordinate list from template in standard order
99  # then loop through the permutations and try to align and get RMSD
100  # should allow you to try all mappings within each protein
101  template_xyz = []
102  torder = sum([list(i) for i in self.Product[0]], [])
103  for t in torder:
104  template_xyz += [IMP.algebra.Vector3D(i) for i in self.template[t]]
105  # template_xyz = np.array(template_xyz)
106  self.rmsd, Transformation = 10000000000., ''
107 
108  # then for each permutation, get flat list of coords and get RMSD
109  for comb in self.Product:
110  order = sum([list(i) for i in comb], [])
111  query_xyz = []
112  for p in order:
113  query_xyz += [IMP.algebra.Vector3D(i) for i in self.query[p]]
114 
115  if len(template_xyz) != len(query_xyz):
116  raise ValueError('''the number of coordinates
117  in template and query does not match!''')
118 
119  transformation = \
121  query_xyz, template_xyz)
122  query_xyz_tr = [transformation.get_transformed(n)
123  for n in query_xyz]
124 
125  dist = sqrt(
126  sum(np.diagonal(cdist(template_xyz, query_xyz_tr) ** 2))
127  / len(template_xyz))
128  if dist < self.rmsd:
129  self.rmsd = dist
130  Transformation = transformation
131 
132  # return the transformation
133  return (self.rmsd, Transformation)
134 
135 
136 # TEST for the alignment ###
137 """
138 Proteins = {'a..1':np.array([np.array([-1.,1.])]),
139  'a..2':np.array([np.array([1.,1.,])]),
140  'a..3':np.array([np.array([-2.,1.])]),
141  'b':np.array([np.array([0.,-1.])]),
142  'c..1':np.array([np.array([-1.,-1.])]),
143  'c..2':np.array([np.array([1.,-1.])]),
144  'd':np.array([np.array([0.,0.])]),
145  'e':np.array([np.array([0.,1.])])}
146 
147 Ali = Alignment(Proteins, Proteins)
148 Ali.permute()
149 if Ali.get_rmsd() == 0.0: print 'successful test!'
150 else: print 'ERROR!'; exit()
151 """
152 
153 
154 # ----------------------------------
155 class Violations(object):
156 
157  def __init__(self, filename):
158 
159  self.violation_thresholds = {}
160  self.violation_counts = {}
161 
162  data = open(filename)
163  D = data.readlines()
164  data.close()
165 
166  for d in D:
167  d = d.strip().split()
168  self.violation_thresholds[d[0]] = float(d[1])
169 
170  def get_number_violated_restraints(self, rsts_dict):
171  num_violated = 0
172  for rst in self.violation_thresholds:
173  if rst not in rsts_dict:
174  continue # print rst;
175  if float(rsts_dict[rst]) > self.violation_thresholds[rst]:
176  num_violated += 1
177  if rst not in self.violation_counts:
178  self.violation_counts[rst] = 1
179  else:
180  self.violation_counts[rst] += 1
181  return num_violated
182 
183 
184 class Clustering(object):
185  """A class to cluster structures.
186  Uses scipy's cdist function to compute distance matrices
187  and sklearn's kmeans clustering module.
188  """
189  def __init__(self, rmsd_weights=None):
190  """Constructor.
191  @param rmsd_weights Flat list of weights for each particle
192  (if they're coarse)
193  """
194  try:
195  from mpi4py import MPI
196  self.comm = MPI.COMM_WORLD
197  self.rank = self.comm.Get_rank()
198  self.number_of_processes = self.comm.size
199  except ImportError:
200  self.number_of_processes = 1
201  self.rank = 0
202  self.all_coords = {}
203  self.structure_cluster_ids = None
204  self.tmpl_coords = None
205  self.rmsd_weights = rmsd_weights
206 
207  def set_template(self, part_coords):
208  self.tmpl_coords = part_coords
209 
210  def fill(self, frame, Coords):
211  """Add coordinates for a single model."""
212  self.all_coords[frame] = Coords
213 
214  def dist_matrix(self):
215  self.model_list_names = list(self.all_coords.keys())
216  self.model_indexes = list(range(len(self.model_list_names)))
217  self.model_indexes_dict = dict(
218  list(zip(self.model_list_names, self.model_indexes)))
219  model_indexes_unique_pairs = \
220  list(itertools.combinations(self.model_indexes, 2))
221 
222  my_model_indexes_unique_pairs = IMP.pmi.tools.chunk_list_into_segments(
223  model_indexes_unique_pairs,
224  self.number_of_processes)[self.rank]
225 
226  print("process %s assigned with %s pairs"
227  % (str(self.rank), str(len(my_model_indexes_unique_pairs))))
228 
229  (raw_distance_dict, self.transformation_distance_dict) = \
230  self.matrix_calculation(self.all_coords, self.tmpl_coords,
231  my_model_indexes_unique_pairs)
232 
233  if self.number_of_processes > 1:
234  raw_distance_dict = IMP.pmi.tools.scatter_and_gather(
235  raw_distance_dict)
236  pickable_transformations = \
237  self.get_pickable_transformation_distance_dict()
238  pickable_transformations = IMP.pmi.tools.scatter_and_gather(
239  pickable_transformations)
240  self.set_transformation_distance_dict_from_pickable(
241  pickable_transformations)
242 
243  self.raw_distance_matrix = np.zeros(
244  (len(self.model_list_names), len(self.model_list_names)))
245  for item in raw_distance_dict:
246  (f1, f2) = item
247  self.raw_distance_matrix[f1, f2] = raw_distance_dict[item]
248  self.raw_distance_matrix[f2, f1] = raw_distance_dict[item]
249 
250  def get_dist_matrix(self):
251  return self.raw_distance_matrix
252 
253  def do_cluster(self, number_of_clusters, seed=None):
254  """Run K-means clustering
255  @param number_of_clusters Num means
256  @param seed the random seed
257  """
258  from sklearn.cluster import KMeans
259  if seed is not None:
260  np.random.seed(seed)
261  try:
262  # check whether we have the right version of sklearn
263  kmeans = KMeans(n_clusters=number_of_clusters)
264  except TypeError:
265  # sklearn older than 0.12
266  kmeans = KMeans(k=number_of_clusters)
267  kmeans.fit_predict(self.raw_distance_matrix)
268 
269  self.structure_cluster_ids = kmeans.labels_
270 
271  def get_pickable_transformation_distance_dict(self):
272  pickable_transformations = {}
273  for label in self.transformation_distance_dict:
274  tr = self.transformation_distance_dict[label]
275  trans = tuple(tr.get_translation())
276  rot = tuple(tr.get_rotation().get_quaternion())
277  pickable_transformations[label] = (rot, trans)
278  return pickable_transformations
279 
280  def set_transformation_distance_dict_from_pickable(
281  self,
282  pickable_transformations):
283  self.transformation_distance_dict = {}
284  for label in pickable_transformations:
285  tr = pickable_transformations[label]
286  trans = IMP.algebra.Vector3D(tr[1])
287  rot = IMP.algebra.Rotation3D(tr[0])
288  self.transformation_distance_dict[
289  label] = IMP.algebra.Transformation3D(rot, trans)
290 
291  def save_distance_matrix_file(self, file_name='cluster.rawmatrix.pkl'):
292  import pickle
293  outf = open(file_name + ".data", 'wb')
294 
295  # to pickle the transformation dictionary
296  # you have to save the arrays correposnding to
297  # the transformations
298 
299  pickable_transformations = \
300  self.get_pickable_transformation_distance_dict()
301  pickle.dump(
302  (self.structure_cluster_ids,
303  self.model_list_names,
304  pickable_transformations),
305  outf)
306 
307  np.save(file_name + ".npy", self.raw_distance_matrix)
308 
309  def load_distance_matrix_file(self, file_name='cluster.rawmatrix.pkl'):
310  import pickle
311 
312  inputf = open(file_name + ".data", 'rb')
313  (self.structure_cluster_ids, self.model_list_names,
314  pickable_transformations) = pickle.load(inputf)
315  inputf.close()
316 
317  self.raw_distance_matrix = np.load(file_name + ".npy")
318 
319  self.set_transformation_distance_dict_from_pickable(
320  pickable_transformations)
321  self.model_indexes = list(range(len(self.model_list_names)))
322  self.model_indexes_dict = dict(
323  list(zip(self.model_list_names, self.model_indexes)))
324 
325  def plot_matrix(self, figurename="clustermatrix.pdf"):
326  import matplotlib as mpl
327  mpl.use('Agg')
328  import matplotlib.pylab as pl
329  from scipy.cluster import hierarchy as hrc
330 
331  fig = pl.figure(figsize=(10, 8))
332  ax = fig.add_subplot(212)
333  dendrogram = hrc.dendrogram(
334  hrc.linkage(self.raw_distance_matrix),
335  color_threshold=7,
336  no_labels=True)
337  leaves_order = dendrogram['leaves']
338  ax.set_xlabel('Model')
339  ax.set_ylabel('RMSD [Angstroms]')
340 
341  ax2 = fig.add_subplot(221)
342  cax = ax2.imshow(
343  self.raw_distance_matrix[leaves_order,
344  :][:,
345  leaves_order],
346  interpolation='nearest')
347  cb = fig.colorbar(cax)
348  cb.set_label('RMSD [Angstroms]')
349  ax2.set_xlabel('Model')
350  ax2.set_ylabel('Model')
351 
352  pl.savefig(figurename, dpi=300)
353  pl.close(fig)
354 
355  def get_model_index_from_name(self, name):
356  return self.model_indexes_dict[name]
357 
358  def get_cluster_labels(self):
359  # this list
360  return list(set(self.structure_cluster_ids))
361 
362  def get_number_of_clusters(self):
363  return len(self.get_cluster_labels())
364 
365  def get_cluster_label_indexes(self, label):
366  return (
367  [i for i, l in enumerate(self.structure_cluster_ids) if l == label]
368  )
369 
370  def get_cluster_label_names(self, label):
371  return (
372  [self.model_list_names[i]
373  for i in self.get_cluster_label_indexes(label)]
374  )
375 
376  def get_cluster_label_average_rmsd(self, label):
377 
378  indexes = self.get_cluster_label_indexes(label)
379 
380  if len(indexes) > 1:
381  sub_distance_matrix = self.raw_distance_matrix[
382  indexes, :][:, indexes]
383  average_rmsd = np.sum(sub_distance_matrix) / \
384  (len(sub_distance_matrix)
385  ** 2 - len(sub_distance_matrix))
386  else:
387  average_rmsd = 0.0
388  return average_rmsd
389 
390  def get_cluster_label_size(self, label):
391  return len(self.get_cluster_label_indexes(label))
392 
393  def get_transformation_to_first_member(
394  self,
395  cluster_label,
396  structure_index):
397  reference = self.get_cluster_label_indexes(cluster_label)[0]
398  return self.transformation_distance_dict[(reference, structure_index)]
399 
400  def matrix_calculation(self, all_coords, template_coords, list_of_pairs):
401 
402  model_list_names = list(all_coords.keys())
403  rmsd_protein_names = list(all_coords[model_list_names[0]].keys())
404  raw_distance_dict = {}
405  transformation_distance_dict = {}
406  if template_coords is None:
407  do_alignment = False
408  else:
409  do_alignment = True
410  alignment_template_protein_names = list(template_coords.keys())
411 
412  for (f1, f2) in list_of_pairs:
413 
414  if not do_alignment:
415  # here we only get the rmsd,
416  # we need that for instance when you want to cluster
417  # conformations globally, eg the EM map is a reference
419 
420  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
421  for pr in rmsd_protein_names])
422  coords_f2 = {}
423  for pr in rmsd_protein_names:
424  coords_f2[pr] = all_coords[model_list_names[f2]][pr]
425 
426  Ali = Alignment(coords_f1, coords_f2, self.rmsd_weights)
427  rmsd = Ali.get_rmsd()
428 
429  elif do_alignment:
430  # here we actually align the conformations first
431  # and than calculate the rmsd. We need that when the
432  # protein(s) is the reference
433  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
434  for pr in alignment_template_protein_names])
435  coords_f2 = dict([(pr, all_coords[model_list_names[f2]][pr])
436  for pr in alignment_template_protein_names])
437 
438  Ali = Alignment(coords_f1, coords_f2)
439  template_rmsd, transformation = Ali.align()
440 
441  # here we calculate the rmsd
442  # we will align two models based n the nuber of subunits
443  # provided and transform coordinates of model 2 to model 1
444  coords_f1 = dict([(pr, all_coords[model_list_names[f1]][pr])
445  for pr in rmsd_protein_names])
446  coords_f2 = {}
447  for pr in rmsd_protein_names:
448  coords_f2[pr] = [transformation.get_transformed(
449  i) for i in all_coords[model_list_names[f2]][pr]]
450 
451  Ali = Alignment(coords_f1, coords_f2, self.rmsd_weights)
452  rmsd = Ali.get_rmsd()
453 
454  raw_distance_dict[(f1, f2)] = rmsd
455  raw_distance_dict[(f2, f1)] = rmsd
456  transformation_distance_dict[(f1, f2)] = transformation
457  transformation_distance_dict[(f2, f1)] = transformation
458 
459  return raw_distance_dict, transformation_distance_dict
460 
461 
462 class RMSD(object):
463  """Compute the RMSD (without alignment) taking into account the copy
464  ambiguity.
465  To be used with pmi2 hierarchies. Can be used for instance as follows:
466 
467  rmsd = IMP.pmi.analysis.RMSD(
468  hier, hier, [mol.get_name() for mol in mols],
469  dynamic0=True, dynamic1=False)
470  output_objects.append(rmsd)
471 
472  before shuffling the coordinates
473  """
474 
475  def __init__(self, hier0, hier1, molnames, label="None", dynamic0=True,
476  dynamic1=True, metric=IMP.algebra.get_rmsd):
477  """
478  @param hier0 first input hierarchy
479  @param hier1 second input hierarchy
480  @param molname the names of the molecules used for the RMSD
481  @dynamic0 if True stores the decorators XYZ and coordinates of
482  hier0 can be update. If false coordinates are static
483  (stored in Vector3Ds) and will never be updated
484  @dynamic1 same as above
485  metric what metric should be used
486  """
487  self.moldict0, self.molcoords0, self.mol_XYZs0 = \
488  self.get_moldict_coorddict(hier0, molnames)
489  self.moldict1, self.molcoords1, self.mol_XYZs1 = \
490  self.get_moldict_coorddict(hier1, molnames)
491  self.dynamic0 = dynamic0
492  self.dynamic1 = dynamic1
493  self.metric = metric
494  self.label = label
495 
496  def get_moldict_coorddict(self, hier, molnames):
497  """return data structure for the RMSD calculation"""
498  moldict = {}
499  mol_coords = {}
500  mol_XYZs = {}
501  for mol in IMP.pmi.tools.get_molecules(hier):
502  name = mol.get_name()
503  if name not in molnames:
504  continue
505  parts = True
506  mol_coords[mol] = []
507  mol_XYZs[mol] = []
508  i = 1
509  while parts:
510  sel = IMP.atom.Selection(
511  mol, residue_index=i,
512  representation_type=IMP.atom.BALLS,
513  resolution=1)
514  parts = sel.get_selected_particles()
515  if parts:
516  mol_coords[mol].append(
517  IMP.core.XYZ(parts[0]).get_coordinates())
518  mol_XYZs[mol].append(IMP.core.XYZ(parts[0]))
519  i = i+1
520  if name in moldict:
521  moldict[name].append(mol)
522  else:
523  moldict[name] = [mol]
524  return moldict, mol_coords, mol_XYZs
525 
526  def get_rmsd_and_assigments(self):
527  total_rmsd = 0
528  total_N = 0
529  best_assignments = []
530  rmsd_dict = {}
531  for molname, ref_mols in self.moldict1.items():
532  ref_coords = []
533  for ref_mol in ref_mols:
534  if self.dynamic1:
535  coord1 = [XYZ.get_coordinates()
536  for XYZ in self.mol_XYZs1[ref_mol]]
537  else:
538  coord1 = self.molcoords1[ref_mol]
539  ref_coords += coord1
540 
541  rmsd = []
542  rmf_mols_list = []
543  for rmf_mols in itertools.permutations(self.moldict0[molname]):
544  rmf_coords = []
545  for rmf_mol in rmf_mols:
546  if self.dynamic0:
547  coord0 = [XYZ.get_coordinates()
548  for XYZ in self.mol_XYZs0[rmf_mol]]
549  else:
550  coord0 = self.molcoords0[rmf_mol]
551  rmf_coords += coord0
552 
553  rmsd.append(IMP.algebra.get_rmsd(ref_coords, rmf_coords))
554  rmf_mols_list.append(rmf_mols)
555  m = min(rmsd)
556  rmf_mols_best_order = rmf_mols_list[rmsd.index(m)]
557 
558  for n, (ref_mol, rmf_mol) in enumerate(
559  zip(ref_mols, rmf_mols_best_order)):
560  best_assignments.append((rmf_mol, ref_mol))
561  if self.dynamic0:
562  coord0 = [XYZ.get_coordinates()
563  for XYZ in self.mol_XYZs0[rmf_mol]]
564  else:
565  coord0 = self.molcoords0[rmf_mol]
566 
567  if self.dynamic1:
568  coord1 = [XYZ.get_coordinates()
569  for XYZ in self.mol_XYZs1[ref_mol]]
570  else:
571  coord1 = self.molcoords1[ref_mol]
572  rmsd_pair = self.metric(coord1, coord0)
573  N = len(self.molcoords1[ref_mol])
574  total_N += N
575  total_rmsd += rmsd_pair*rmsd_pair*N
576  rmsd_dict[ref_mol] = rmsd_pair
577  total_rmsd = sqrt(total_rmsd/total_N)
578  return total_rmsd, best_assignments
579 
580  def get_output(self):
581  """Returns output for IMP.pmi.output.Output object"""
582  total_rmsd, best_assignments = self.get_rmsd_and_assigments()
583 
584  assignments_out = []
585  for rmf_mol, ref_mol in best_assignments:
586  ref_name = ref_mol.get_name()
587  ref_copy = IMP.atom.Copy(ref_mol).get_copy_index()
588  rmf_name = rmf_mol.get_name()
589  rmf_copy = IMP.atom.Copy(rmf_mol).get_copy_index()
590  assignments_out.append(rmf_name + "." + str(rmf_copy) + "->" +
591  ref_name + "." + str(ref_copy))
592  return {"RMSD_" + self.label: str(total_rmsd),
593  "RMSD_assignments_" + self.label: str(assignments_out)}
594 
595 
596 class Precision(object):
597  """A class to evaluate the precision of an ensemble.
598 
599  Also can evaluate the cross-precision of multiple ensembles.
600  Supports MPI for coordinate reading.
601  Recommended procedure:
602  -# initialize object and pass the selection for evaluating precision
603  -# call add_structures() to read in the data (specify group name)
604  -# call get_precision() to evaluate inter/intra precision
605  -# call get_rmsf() to evaluate within-group fluctuations
606  """
607  def __init__(self, model, resolution=1, selection_dictionary={}):
608  """Constructor.
609  @param model The IMP Model
610  @param resolution Use 1 or 10 (kluge: requires that "_Res:X" is
611  part of the hier name)
612  @param selection_dictionary Dictionary where keys are names for
613  selections and values are selection tuples for scoring
614  precision. "All" is automatically made as well
615  """
616  try:
617  from mpi4py import MPI
618  self.comm = MPI.COMM_WORLD
619  self.rank = self.comm.Get_rank()
620  self.number_of_processes = self.comm.size
621  except ImportError:
622  self.number_of_processes = 1
623  self.rank = 0
624 
625  self.styles = ['pairwise_rmsd', 'pairwise_drmsd_k', 'pairwise_drmsd_Q',
626  'pairwise_drms_k', 'pairwise_rmsd', 'drmsd_from_center']
627  self.style = 'pairwise_drmsd_k'
628  self.structures_dictionary = {}
629  self.reference_structures_dictionary = {}
630  self.prots = []
631  self.protein_names = None
632  self.len_particles_resolution_one = None
633  self.model = model
634  self.rmf_names_frames = {}
635  self.reference_rmf_names_frames = None
636  self.reference_structure = None
637  self.reference_prot = None
638  self.selection_dictionary = selection_dictionary
639  self.threshold = 40.0
640  self.residue_particle_index_map = None
641  self.prots = None
642  if resolution in [1, 10]:
643  self.resolution = resolution
644  else:
645  raise KeyError("Currently only allow resolution 1 or 10")
646 
647  def _get_structure(self, rmf_frame_index, rmf_name):
648  """Read an RMF file and return the particles"""
649  rh = RMF.open_rmf_file_read_only(rmf_name)
650  if not self.prots:
651  print("getting coordinates for frame %i rmf file %s"
652  % (rmf_frame_index, rmf_name))
653  self.prots = IMP.rmf.create_hierarchies(rh, self.model)
654  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
655  else:
656  print("linking coordinates for frame %i rmf file %s"
657  % (rmf_frame_index, rmf_name))
658  IMP.rmf.link_hierarchies(rh, self.prots)
659  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
660  del rh
661 
662  if self.resolution == 1:
663  particle_dict = get_particles_at_resolution_one(self.prots[0])
664  elif self.resolution == 10:
665  particle_dict = get_particles_at_resolution_ten(self.prots[0])
666 
667  protein_names = list(particle_dict.keys())
668  particles_resolution_one = []
669  for k in particle_dict:
670  particles_resolution_one += (particle_dict[k])
671 
672  if self.protein_names is None:
673  self.protein_names = protein_names
674  else:
675  if self.protein_names != protein_names:
676  print("Error: the protein names of the new coordinate set "
677  "is not compatible with the previous one")
678 
679  if self.len_particles_resolution_one is None:
680  self.len_particles_resolution_one = len(particles_resolution_one)
681  else:
682  if self.len_particles_resolution_one != \
683  len(particles_resolution_one):
684  raise ValueError("the new coordinate set is not compatible "
685  "with the previous one")
686 
687  return particles_resolution_one
688 
689  def add_structure(self,
690  rmf_name,
691  rmf_frame_index,
692  structure_set_name,
693  setup_index_map=False):
694  """ Read a structure into the ensemble and store (as coordinates).
695  @param rmf_name The name of the RMF file
696  @param rmf_frame_index The frame to read
697  @param structure_set_name Name for the set that includes this structure
698  (e.g. "cluster 1")
699  @param setup_index_map if requested, set up a dictionary to help
700  find residue indexes
701  """
702 
703  # decide where to put this structure
704  if structure_set_name in self.structures_dictionary:
705  cdict = self.structures_dictionary[structure_set_name]
706  rmflist = self.rmf_names_frames[structure_set_name]
707  else:
708  self.structures_dictionary[structure_set_name] = {}
709  self.rmf_names_frames[structure_set_name] = []
710  cdict = self.structures_dictionary[structure_set_name]
711  rmflist = self.rmf_names_frames[structure_set_name]
712 
713  # read the particles
714  try:
715  particles_resolution_one = self._get_structure(
716  rmf_frame_index, rmf_name)
717  except ValueError:
718  print("something wrong with the rmf")
719  return 0
720  self.selection_dictionary.update({"All": self.protein_names})
721 
722  for selection_name in self.selection_dictionary:
723  selection_tuple = self.selection_dictionary[selection_name]
724  coords = self._select_coordinates(
725  selection_tuple, particles_resolution_one, self.prots[0])
726  if selection_name not in cdict:
727  cdict[selection_name] = [coords]
728  else:
729  cdict[selection_name].append(coords)
730 
731  rmflist.append((rmf_name, rmf_frame_index))
732 
733  # if requested, set up a dictionary to help find residue indexes
734  if setup_index_map:
735  self.residue_particle_index_map = {}
736  for prot_name in self.protein_names:
737  self.residue_particle_index_map[prot_name] = \
738  self._get_residue_particle_index_map(
739  prot_name, particles_resolution_one, self.prots[0])
740 
741  def add_structures(self,
742  rmf_name_frame_tuples,
743  structure_set_name):
744  """Read a list of RMFs, supports parallel
745  @param rmf_name_frame_tuples list of (rmf_file_name,frame_number)
746  @param structure_set_name Name this set of structures
747  (e.g. "cluster.1")
748  """
749 
750  # split up the requested list to read in parallel
751  my_rmf_name_frame_tuples = IMP.pmi.tools.chunk_list_into_segments(
752  rmf_name_frame_tuples, self.number_of_processes)[self.rank]
753  for nfr, tup in enumerate(my_rmf_name_frame_tuples):
754  rmf_name = tup[0]
755  rmf_frame_index = tup[1]
756  # the first frame stores the map between residues and particles
757  if self.residue_particle_index_map is None:
758  setup_index_map = True
759  else:
760  setup_index_map = False
761  self.add_structure(rmf_name,
762  rmf_frame_index,
763  structure_set_name,
764  setup_index_map)
765 
766  # synchronize the structures
767  if self.number_of_processes > 1:
768  self.rmf_names_frames = IMP.pmi.tools.scatter_and_gather(
769  self.rmf_names_frames)
770  if self.rank != 0:
771  self.comm.send(self.structures_dictionary, dest=0, tag=11)
772  elif self.rank == 0:
773  for i in range(1, self.number_of_processes):
774  data_tmp = self.comm.recv(source=i, tag=11)
775  for key in self.structures_dictionary:
776  self.structures_dictionary[key].update(data_tmp[key])
777  for i in range(1, self.number_of_processes):
778  self.comm.send(self.structures_dictionary, dest=i, tag=11)
779  if self.rank != 0:
780  self.structures_dictionary = self.comm.recv(source=0, tag=11)
781 
782  def _get_residue_particle_index_map(self, prot_name, structure, hier):
783  # Creates map from all particles to residue numbers
784  residue_particle_index_map = []
785  if IMP.pmi.get_is_canonical(hier):
786  s = IMP.atom.Selection(hier, molecules=[prot_name],
787  resolution=1)
788  else:
789  s = IMP.atom.Selection(hier, molecules=[prot_name])
790  all_selected_particles = s.get_selected_particles()
791  intersection = list(set(all_selected_particles) & set(structure))
792  sorted_intersection = IMP.pmi.tools.sort_by_residues(intersection)
793  for p in sorted_intersection:
794  residue_particle_index_map.append(
796  return residue_particle_index_map
797 
798  def _select_coordinates(self, tuple_selections, structure, prot):
799  selected_coordinates = []
800  for t in tuple_selections:
801  if isinstance(t, tuple) and len(t) == 3:
802  if IMP.pmi.get_is_canonical(prot):
803  s = IMP.atom.Selection(
804  prot, molecules=[t[2]],
805  residue_indexes=range(t[0], t[1]+1), resolution=1)
806  else:
807  s = IMP.atom.Selection(
808  prot, molecules=[t[2]],
809  residue_indexes=range(t[0], t[1]+1))
810  all_selected_particles = s.get_selected_particles()
811  intersection = list(set(all_selected_particles)
812  & set(structure))
813  sorted_intersection = \
814  IMP.pmi.tools.sort_by_residues(intersection)
815  cc = [tuple(IMP.core.XYZ(p).get_coordinates())
816  for p in sorted_intersection]
817  selected_coordinates += cc
818  elif isinstance(t, str):
819  if IMP.pmi.get_is_canonical(prot):
820  s = IMP.atom.Selection(prot, molecules=[t], resolution=1)
821  else:
822  s = IMP.atom.Selection(prot, molecules=[t])
823  all_selected_particles = s.get_selected_particles()
824  intersection = list(set(all_selected_particles)
825  & set(structure))
826  sorted_intersection = \
827  IMP.pmi.tools.sort_by_residues(intersection)
828  cc = [tuple(IMP.core.XYZ(p).get_coordinates())
829  for p in sorted_intersection]
830  selected_coordinates += cc
831  else:
832  raise ValueError("Selection error")
833  return selected_coordinates
834 
835  def set_threshold(self, threshold):
836  self.threshold = threshold
837 
838  def _get_distance(self, structure_set_name1, structure_set_name2,
839  selection_name, index1, index2):
840  """Compute distance between structures with various metrics"""
841  c1 = self.structures_dictionary[
842  structure_set_name1][selection_name][index1]
843  c2 = self.structures_dictionary[
844  structure_set_name2][selection_name][index2]
845  coordinates1 = [IMP.algebra.Vector3D(c) for c in c1]
846  coordinates2 = [IMP.algebra.Vector3D(c) for c in c2]
847 
848  if self.style == 'pairwise_drmsd_k':
849  distance = IMP.atom.get_drmsd(coordinates1, coordinates2)
850  if self.style == 'pairwise_drms_k':
851  distance = IMP.atom.get_drms(coordinates1, coordinates2)
852  if self.style == 'pairwise_drmsd_Q':
853  distance = IMP.atom.get_drmsd_Q(coordinates1, coordinates2,
854  self.threshold)
855 
856  if self.style == 'pairwise_rmsd':
857  distance = IMP.algebra.get_rmsd(coordinates1, coordinates2)
858  return distance
859 
860  def _get_particle_distances(self, structure_set_name1, structure_set_name2,
861  selection_name, index1, index2):
862  c1 = self.structures_dictionary[
863  structure_set_name1][selection_name][index1]
864  c2 = self.structures_dictionary[
865  structure_set_name2][selection_name][index2]
866 
867  coordinates1 = [IMP.algebra.Vector3D(c) for c in c1]
868  coordinates2 = [IMP.algebra.Vector3D(c) for c in c2]
869 
870  distances = [np.linalg.norm(a-b)
871  for (a, b) in zip(coordinates1, coordinates2)]
872 
873  return distances
874 
875  def get_precision(self, structure_set_name1, structure_set_name2,
876  outfile=None, skip=1, selection_keywords=None):
877  """ Evaluate the precision of two named structure groups. Supports MPI.
878  When the structure_set_name1 is different from the structure_set_name2,
879  this evaluates the cross-precision (average pairwise distances).
880  @param outfile Name of the precision output file
881  @param structure_set_name1 string name of the first structure set
882  @param structure_set_name2 string name of the second structure set
883  @param skip analyze every (skip) structure for the distance
884  matrix calculation
885  @param selection_keywords Specify the selection name you want to
886  calculate on. By default this is computed for everything
887  you provided in the constructor, plus all the subunits together.
888  """
889  if selection_keywords is None:
890  sel_keys = list(self.selection_dictionary.keys())
891  else:
892  for k in selection_keywords:
893  if k not in self.selection_dictionary:
894  raise KeyError(
895  "you are trying to find named selection "
896  + k + " which was not requested in the constructor")
897  sel_keys = selection_keywords
898 
899  if outfile is not None:
900  of = open(outfile, "w")
901  centroid_index = 0
902  for selection_name in sel_keys:
903  number_of_structures_1 = len(self.structures_dictionary[
904  structure_set_name1][selection_name])
905  number_of_structures_2 = len(self.structures_dictionary[
906  structure_set_name2][selection_name])
907  distances = {}
908  structure_pointers_1 = list(range(0, number_of_structures_1, skip))
909  structure_pointers_2 = list(range(0, number_of_structures_2, skip))
910  pair_combination_list = list(
911  itertools.product(structure_pointers_1, structure_pointers_2))
912  if len(pair_combination_list) == 0:
913  raise ValueError("no structure selected. Check the "
914  "skip parameter.")
915 
916  # compute pairwise distances in parallel
917  my_pair_combination_list = IMP.pmi.tools.chunk_list_into_segments(
918  pair_combination_list, self.number_of_processes)[self.rank]
919  for n, pair in enumerate(my_pair_combination_list):
920  distances[pair] = self._get_distance(
921  structure_set_name1, structure_set_name2, selection_name,
922  pair[0], pair[1])
923  if self.number_of_processes > 1:
924  distances = IMP.pmi.tools.scatter_and_gather(distances)
925 
926  # Finally compute distance to centroid
927  if self.rank == 0:
928  if structure_set_name1 == structure_set_name2:
929  structure_pointers = structure_pointers_1
930  number_of_structures = number_of_structures_1
931 
932  # calculate the distance from the first centroid
933  # and determine the centroid
934  distance = 0.0
935  distances_to_structure = {}
936  distances_to_structure_normalization = {}
937 
938  for n in structure_pointers:
939  distances_to_structure[n] = 0.0
940  distances_to_structure_normalization[n] = 0
941 
942  for k in distances:
943  distance += distances[k]
944  distances_to_structure[k[0]] += distances[k]
945  distances_to_structure[k[1]] += distances[k]
946  distances_to_structure_normalization[k[0]] += 1
947  distances_to_structure_normalization[k[1]] += 1
948 
949  for n in structure_pointers:
950  distances_to_structure[n] = \
951  distances_to_structure[n] \
952  / distances_to_structure_normalization[n]
953 
954  min_distance = min([distances_to_structure[n]
955  for n in distances_to_structure])
956  centroid_index = [k for k, v in
957  distances_to_structure.items()
958  if v == min_distance][0]
959  centroid_rmf_name = \
960  self.rmf_names_frames[
961  structure_set_name1][centroid_index]
962 
963  centroid_distance = 0.0
964  distance_list = []
965  for n in range(number_of_structures):
966  dist = self._get_distance(
967  structure_set_name1, structure_set_name1,
968  selection_name, centroid_index, n)
969  centroid_distance += dist
970  distance_list.append(dist)
971 
972  centroid_distance /= number_of_structures
973  if outfile is not None:
974  of.write(str(selection_name) + " " +
975  structure_set_name1 +
976  " average centroid distance " +
977  str(centroid_distance) + "\n")
978  of.write(str(selection_name) + " " +
979  structure_set_name1 + " centroid index " +
980  str(centroid_index) + "\n")
981  of.write(str(selection_name) + " " +
982  structure_set_name1 + " centroid rmf name " +
983  str(centroid_rmf_name) + "\n")
984  of.write(str(selection_name) + " " +
985  structure_set_name1 +
986  " median centroid distance " +
987  str(np.median(distance_list)) + "\n")
988 
989  average_pairwise_distances = sum(
990  distances.values())/len(list(distances.values()))
991  if outfile is not None:
992  of.write(str(selection_name) + " " + structure_set_name1 +
993  " " + structure_set_name2 +
994  " average pairwise distance " +
995  str(average_pairwise_distances) + "\n")
996  if outfile is not None:
997  of.close()
998  return centroid_index
999 
1000  def get_rmsf(self, structure_set_name, outdir="./", skip=1,
1001  set_plot_yaxis_range=None):
1002  """ Calculate the residue mean square fluctuations (RMSF).
1003  Automatically outputs as data file and pdf
1004  @param structure_set_name Which structure set to calculate RMSF for
1005  @param outdir Where to write the files
1006  @param skip Skip this number of structures
1007  @param set_plot_yaxis_range In case you need to change the plot
1008  """
1009  # get the centroid structure for the whole complex
1010  centroid_index = self.get_precision(structure_set_name,
1011  structure_set_name,
1012  outfile=None,
1013  skip=skip)
1014  if self.rank == 0:
1015  for sel_name in self.protein_names:
1016  self.selection_dictionary.update({sel_name: [sel_name]})
1017  try:
1018  number_of_structures = \
1019  len(self.structures_dictionary[
1020  structure_set_name][sel_name])
1021  except KeyError:
1022  # that protein was not included in the selection
1023  continue
1024  rpim = self.residue_particle_index_map[sel_name]
1025  outfile = outdir+"/rmsf."+sel_name+".dat"
1026  of = open(outfile, "w")
1027  residue_distances = {}
1028  residue_nblock = {}
1029  for index in range(number_of_structures):
1030  distances = self._get_particle_distances(
1031  structure_set_name, structure_set_name, sel_name,
1032  centroid_index, index)
1033  for nblock, block in enumerate(rpim):
1034  for residue_number in block:
1035  residue_nblock[residue_number] = nblock
1036  if residue_number not in residue_distances:
1037  residue_distances[residue_number] = \
1038  [distances[nblock]]
1039  else:
1040  residue_distances[
1041  residue_number].append(distances[nblock])
1042 
1043  residues = []
1044  rmsfs = []
1045  for rn in residue_distances:
1046  residues.append(rn)
1047  rmsf = np.std(residue_distances[rn])
1048  rmsfs.append(rmsf)
1049  of.write(str(rn) + " " + str(residue_nblock[rn])
1050  + " " + str(rmsf) + "\n")
1051 
1052  IMP.pmi.output.plot_xy_data(
1053  residues, rmsfs, title=sel_name,
1054  out_fn=outdir+"/rmsf."+sel_name, display=False,
1055  set_plot_yaxis_range=set_plot_yaxis_range,
1056  xlabel='Residue Number', ylabel='Standard error')
1057  of.close()
1058 
1059  def set_reference_structure(self, rmf_name, rmf_frame_index):
1060  """Read in a structure used for reference computation.
1061  Needed before calling get_average_distance_wrt_reference_structure()
1062  @param rmf_name The RMF file to read the reference
1063  @param rmf_frame_index The index in that file
1064  """
1065  particles_resolution_one = self._get_structure(rmf_frame_index,
1066  rmf_name)
1067  self.reference_rmf_names_frames = (rmf_name, rmf_frame_index)
1068 
1069  for selection_name in self.selection_dictionary:
1070  selection_tuple = self.selection_dictionary[selection_name]
1071  coords = self._select_coordinates(
1072  selection_tuple, particles_resolution_one, self.prots[0])
1073  self.reference_structures_dictionary[selection_name] = coords
1074 
1076  self, structure_set_name, alignment_selection_key):
1077  """First align then calculate RMSD
1078  @param structure_set_name: the name of the structure set
1079  @param alignment_selection: the key containing the selection tuples
1080  needed to make the alignment stored in self.selection_dictionary
1081  @return: for each structure in the structure set, returns the rmsd
1082  """
1083  ret = {}
1084  if self.reference_structures_dictionary == {}:
1085  print("Cannot compute until you set a reference structure")
1086  return
1087 
1088  align_reference_coordinates = \
1089  self.reference_structures_dictionary[alignment_selection_key]
1090  align_coordinates = self.structures_dictionary[
1091  structure_set_name][alignment_selection_key]
1092  transformations = []
1093  for c in align_coordinates:
1095  {"All": align_reference_coordinates}, {"All": c})
1096  transformation = Ali.align()[1]
1097  transformations.append(transformation)
1098  for selection_name in self.selection_dictionary:
1099  reference_coordinates = \
1100  self.reference_structures_dictionary[selection_name]
1101  coordinates2 = [IMP.algebra.Vector3D(c)
1102  for c in reference_coordinates]
1103  distances = []
1104  for n, sc in enumerate(self.structures_dictionary[
1105  structure_set_name][selection_name]):
1106  coordinates1 = [
1107  transformations[n].get_transformed(IMP.algebra.Vector3D(c))
1108  for c in sc]
1109  distance = IMP.algebra.get_rmsd(coordinates1, coordinates2)
1110  distances.append(distance)
1111  ret[selection_name] = {
1112  'all_distances': distances,
1113  'average_distance': sum(distances)/len(distances),
1114  'minimum_distance': min(distances)}
1115  return ret
1116 
1117  def _get_median(self, list_of_values):
1118  return np.median(np.array(list_of_values))
1119 
1120  def get_average_distance_wrt_reference_structure(self, structure_set_name):
1121  """Compare the structure set to the reference structure.
1122  @param structure_set_name The structure set to compute this on
1123  @note First call set_reference_structure()
1124  """
1125  ret = {}
1126  if self.reference_structures_dictionary == {}:
1127  print("Cannot compute until you set a reference structure")
1128  return
1129  for selection_name in self.selection_dictionary:
1130  reference_coordinates = self.reference_structures_dictionary[
1131  selection_name]
1132  coordinates2 = [IMP.algebra.Vector3D(c)
1133  for c in reference_coordinates]
1134  distances = []
1135  for sc in self.structures_dictionary[
1136  structure_set_name][selection_name]:
1137  coordinates1 = [IMP.algebra.Vector3D(c) for c in sc]
1138  if self.style == 'pairwise_drmsd_k':
1139  distance = IMP.atom.get_drmsd(coordinates1, coordinates2)
1140  if self.style == 'pairwise_drms_k':
1141  distance = IMP.atom.get_drms(coordinates1, coordinates2)
1142  if self.style == 'pairwise_drmsd_Q':
1143  distance = IMP.atom.get_drmsd_Q(
1144  coordinates1, coordinates2, self.threshold)
1145  if self.style == 'pairwise_rmsd':
1146  distance = IMP.algebra.get_rmsd(coordinates1, coordinates2)
1147  distances.append(distance)
1148 
1149  print(selection_name, "average distance",
1150  sum(distances)/len(distances), "minimum distance",
1151  min(distances), 'nframes', len(distances))
1152  ret[selection_name] = {
1153  'average_distance': sum(distances)/len(distances),
1154  'minimum_distance': min(distances)}
1155  return ret
1156 
1157  def get_coordinates(self):
1158  pass
1159 
1160  def set_precision_style(self, style):
1161  if style in self.styles:
1162  self.style = style
1163  else:
1164  raise ValueError("No such style")
1165 
1166 
1167 class GetModelDensity(object):
1168  """Compute mean density maps from structures.
1169 
1170  Keeps a dictionary of density maps,
1171  keys are in the custom ranges. When you call add_subunits_density, it adds
1172  particle coordinates to the existing density maps.
1173  """
1174 
1175  def __init__(self, custom_ranges, representation=None, resolution=20.0,
1176  voxel=5.0):
1177  """Constructor.
1178  @param custom_ranges Required. It's a dictionary, keys are the
1179  density component names, values are selection tuples
1180  e.g. {'kin28':[['kin28',1,-1]],
1181  'density_name_1' :[('ccl1')],
1182  'density_name_2' :[(1,142,'tfb3d1'),
1183  (143,700,'tfb3d2')],
1184  @param representation PMI representation, for doing selections.
1185  Not needed if you only pass hierarchies
1186  @param resolution The MRC resolution of the output map (in
1187  Angstrom unit)
1188  @param voxel The voxel size for the output map (lower is slower)
1189  """
1190 
1191  self.representation = representation
1192  self.MRCresolution = resolution
1193  self.voxel = voxel
1194  self.densities = {}
1195  self.count_models = 0.0
1196  self.custom_ranges = custom_ranges
1197 
1198  def add_subunits_density(self, hierarchy=None):
1199  """Add a frame to the densities.
1200  @param hierarchy Optionally read the hierarchy from somewhere.
1201  If not passed, will just read the representation.
1202  """
1203  self.count_models += 1.0
1204 
1205  if hierarchy:
1206  part_dict = get_particles_at_resolution_one(hierarchy)
1207  all_particles_by_resolution = []
1208  for name in part_dict:
1209  all_particles_by_resolution += part_dict[name]
1210 
1211  for density_name in self.custom_ranges:
1212  parts = []
1213  if hierarchy:
1214  all_particles_by_segments = []
1215 
1216  for seg in self.custom_ranges[density_name]:
1217  if not hierarchy:
1218  # when you have a IMP.pmi.representation.Representation
1219  # class
1220  parts += IMP.tools.select_by_tuple(
1221  self.representation, seg, resolution=1,
1222  name_is_ambiguous=False)
1223  else:
1224  # else, when you have a hierarchy, but not a representation
1225  if not IMP.pmi.get_is_canonical(hierarchy):
1226  for h in hierarchy.get_children():
1229  h.get_particle())
1230 
1231  if isinstance(seg, str):
1232  s = IMP.atom.Selection(hierarchy, molecule=seg)
1233  elif isinstance(seg, tuple) and len(seg) == 2:
1234  s = IMP.atom.Selection(
1235  hierarchy, molecule=seg[0], copy_index=seg[1])
1236  elif isinstance(seg, tuple) and len(seg) == 3:
1237  s = IMP.atom.Selection(
1238  hierarchy, molecule=seg[2],
1239  residue_indexes=range(seg[0], seg[1] + 1))
1240  elif isinstance(seg, tuple) and len(seg) == 4:
1241  s = IMP.atom.Selection(
1242  hierarchy, molecule=seg[2],
1243  residue_indexes=range(seg[0], seg[1] + 1),
1244  copy_index=seg[3])
1245  else:
1246  raise Exception('could not understand selection tuple '
1247  + str(seg))
1248 
1249  all_particles_by_segments += s.get_selected_particles()
1250  if hierarchy:
1251  if IMP.pmi.get_is_canonical(hierarchy):
1252  parts = all_particles_by_segments
1253  else:
1254  parts = list(set(all_particles_by_segments)
1255  & set(all_particles_by_resolution))
1256  self._create_density_from_particles(parts, density_name)
1257 
1258  def normalize_density(self):
1259  pass
1260 
1261  def _create_density_from_particles(self, ps, name,
1262  kernel_type='GAUSSIAN'):
1263  '''Internal function for adding to densities.
1264  pass XYZR particles with mass and create a density from them.
1265  kernel type options are GAUSSIAN, BINARIZED_SPHERE, and SPHERE.'''
1266  dmap = IMP.em.SampledDensityMap(ps, self.MRCresolution, self.voxel)
1267  dmap.calcRMS()
1268  dmap.set_was_used(True)
1269  if name not in self.densities:
1270  self.densities[name] = dmap
1271  else:
1272  bbox1 = IMP.em.get_bounding_box(self.densities[name])
1273  bbox2 = IMP.em.get_bounding_box(dmap)
1274  bbox1 += bbox2
1275  dmap3 = IMP.em.create_density_map(bbox1, self.voxel)
1276  dmap3.set_was_used(True)
1277  dmap3.add(dmap)
1278  dmap3.add(self.densities[name])
1279  self.densities[name] = dmap3
1280 
1281  def get_density_keys(self):
1282  return list(self.densities.keys())
1283 
1284  def get_density(self, name):
1285  """Get the current density for some component name"""
1286  if name not in self.densities:
1287  return None
1288  else:
1289  return self.densities[name]
1290 
1291  def write_mrc(self, path="./", suffix=None):
1292  import os
1293  import errno
1294  for density_name in self.densities:
1295  self.densities[density_name].multiply(1. / self.count_models)
1296  if suffix is None:
1297  name = path + "/" + density_name + ".mrc"
1298  else:
1299  name = path + "/" + density_name + "." + suffix + ".mrc"
1300  path, file = os.path.split(name)
1301  if not os.path.exists(path):
1302  try:
1303  os.makedirs(path)
1304  except OSError as e:
1305  if e.errno != errno.EEXIST:
1306  raise
1307  IMP.em.write_map(self.densities[density_name], name,
1309 
1310 
1311 class GetContactMap(object):
1312 
1313  def __init__(self, distance=15.):
1314  self.distance = distance
1315  self.contactmap = ''
1316  self.namelist = []
1317  self.xlinks = 0
1318  self.XL = {}
1319  self.expanded = {}
1320  self.resmap = {}
1321 
1322  def set_prot(self, prot):
1323  self.prot = prot
1324  self.protnames = []
1325 
1326  particles_dictionary = get_particles_at_resolution_one(self.prot)
1327 
1328  for name in particles_dictionary:
1329  residue_indexes = []
1330  for p in particles_dictionary[name]:
1331  print(p.get_name())
1332  residue_indexes.extend(IMP.pmi.tools.get_residue_indexes(p))
1333 
1334  if len(residue_indexes) != 0:
1335  self.protnames.append(name)
1336 
1337  def get_subunit_coords(self, frame, align=0):
1338  from scipy.spatial.distance import cdist
1339  coords = []
1340  radii = []
1341  namelist = []
1342  for part in self.prot.get_children():
1343  SortedSegments = []
1344  print(part)
1345  for chl in part.get_children():
1346  start = IMP.atom.get_leaves(chl)[0]
1347 
1348  startres = IMP.atom.Fragment(start).get_residue_indexes()[0]
1349  SortedSegments.append((chl, startres))
1350  SortedSegments = sorted(SortedSegments, key=itemgetter(1))
1351 
1352  for sgmnt in SortedSegments:
1353  for leaf in IMP.atom.get_leaves(sgmnt[0]):
1354  p = IMP.core.XYZR(leaf)
1355  crd = np.array([p.get_x(), p.get_y(), p.get_z()])
1356 
1357  coords.append(crd)
1358  radii.append(p.get_radius())
1359 
1360  new_name = part.get_name() + '_' + sgmnt[0].get_name() + \
1361  '_' + \
1362  str(IMP.atom.Fragment(leaf)
1363  .get_residue_indexes()[0])
1364  namelist.append(new_name)
1365  self.expanded[new_name] = len(
1366  IMP.atom.Fragment(leaf).get_residue_indexes())
1367  if part.get_name() not in self.resmap:
1368  self.resmap[part.get_name()] = {}
1369  for res in IMP.atom.Fragment(leaf).get_residue_indexes():
1370  self.resmap[part.get_name()][res] = new_name
1371 
1372  coords = np.array(coords)
1373  radii = np.array(radii)
1374  if len(self.namelist) == 0:
1375  self.namelist = namelist
1376  self.contactmap = np.zeros((len(coords), len(coords)))
1377  distances = cdist(coords, coords)
1378  distances = (distances - radii).T - radii
1379  distances = distances <= self.distance
1380  self.contactmap += distances
1381 
1382  def add_xlinks(self, filname,
1383  identification_string='ISDCrossLinkMS_Distance_'):
1384  self.xlinks = 1
1385  with open(filname) as data:
1386  D = data.readlines()
1387 
1388  for d in D:
1389  if identification_string in d:
1390  d = d.replace("_", " ").replace("-", " ").replace(
1391  ":", " ").split()
1392 
1393  t1, t2 = (d[0], d[1]), (d[1], d[0])
1394  if t1 not in self.XL:
1395  self.XL[t1] = [(int(d[2]) + 1, int(d[3]) + 1)]
1396  self.XL[t2] = [(int(d[3]) + 1, int(d[2]) + 1)]
1397  else:
1398  self.XL[t1].append((int(d[2]) + 1, int(d[3]) + 1))
1399  self.XL[t2].append((int(d[3]) + 1, int(d[2]) + 1))
1400 
1401  def dist_matrix(self, skip_cmap=0, skip_xl=1, outname=None):
1402  K = self.namelist
1403  M = self.contactmap
1404  C, R = [], []
1405  proteins = self.protnames
1406 
1407  # exp new
1408  if skip_cmap == 0:
1409  Matrices = {}
1410  proteins = [p.get_name() for p in self.prot.get_children()]
1411  missing = []
1412  for p1 in range(len(proteins)):
1413  for p2 in range(p1, len(proteins)):
1414  pl1, pl2 = (max(self.resmap[proteins[p1]].keys()),
1415  max(self.resmap[proteins[p2]].keys()))
1416  pn1, pn2 = proteins[p1], proteins[p2]
1417  mtr = np.zeros((pl1 + 1, pl2 + 1))
1418  print('Creating matrix for: ',
1419  p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1420  for i1 in range(1, pl1 + 1):
1421  for i2 in range(1, pl2 + 1):
1422  try:
1423  r1 = K.index(self.resmap[pn1][i1])
1424  r2 = K.index(self.resmap[pn2][i2])
1425  r = M[r1, r2]
1426  mtr[i1 - 1, i2 - 1] = r
1427  except KeyError:
1428  missing.append((pn1, pn2, i1, i2))
1429  Matrices[(pn1, pn2)] = mtr
1430 
1431  # add cross-links
1432  if skip_xl == 0:
1433  if self.XL == {}:
1434  raise ValueError("cross-links were not provided, use "
1435  "add_xlinks function!")
1436  Matrices_xl = {}
1437  for p1 in range(len(proteins)):
1438  for p2 in range(p1, len(proteins)):
1439  pl1, pl2 = (max(self.resmap[proteins[p1]].keys()),
1440  max(self.resmap[proteins[p2]].keys()))
1441  pn1, pn2 = proteins[p1], proteins[p2]
1442  mtr = np.zeros((pl1 + 1, pl2 + 1))
1443  flg = 0
1444  try:
1445  xls = self.XL[(pn1, pn2)]
1446  except KeyError:
1447  try:
1448  xls = self.XL[(pn2, pn1)]
1449  flg = 1
1450  except KeyError:
1451  flg = 2
1452  if flg == 0:
1453  print('Creating matrix for: ',
1454  p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1455  for xl1, xl2 in xls:
1456  if xl1 > pl1:
1457  print('X' * 10, xl1, xl2)
1458  xl1 = pl1
1459  if xl2 > pl2:
1460  print('X' * 10, xl1, xl2)
1461  xl2 = pl2
1462  mtr[xl1 - 1, xl2 - 1] = 100
1463  elif flg == 1:
1464  print('Creating matrix for: ',
1465  p1, p2, pn1, pn2, mtr.shape, pl1, pl2)
1466  for xl1, xl2 in xls:
1467  if xl1 > pl1:
1468  print('X' * 10, xl1, xl2)
1469  xl1 = pl1
1470  if xl2 > pl2:
1471  print('X' * 10, xl1, xl2)
1472  xl2 = pl2
1473  mtr[xl2 - 1, xl1 - 1] = 100
1474  else:
1475  print('No cross-links between: ', pn1, pn2)
1476  Matrices_xl[(pn1, pn2)] = mtr
1477 
1478  # make list of protein names and create coordinate lists
1479  C = proteins
1480  # W is the component length list,
1481  # R is the contiguous coordinates list
1482  W, R = [], []
1483  for i, c in enumerate(C):
1484  cl = max(self.resmap[c].keys())
1485  W.append(cl)
1486  if i == 0:
1487  R.append(cl)
1488  else:
1489  R.append(R[-1] + cl)
1490 
1491  # start plotting
1492  if outname:
1493  # Don't require a display
1494  import matplotlib as mpl
1495  mpl.use('Agg')
1496  import matplotlib.pyplot as plt
1497  import matplotlib.gridspec as gridspec
1498  import scipy.sparse as sparse
1499 
1500  plt.figure()
1501  gs = gridspec.GridSpec(len(W), len(W),
1502  width_ratios=W,
1503  height_ratios=W)
1504 
1505  cnt = 0
1506  for x1, r1 in enumerate(R):
1507  for x2, r2 in enumerate(R):
1508  ax = plt.subplot(gs[cnt])
1509  if skip_cmap == 0:
1510  try:
1511  mtr = Matrices[(C[x1], C[x2])]
1512  except KeyError:
1513  mtr = Matrices[(C[x2], C[x1])].T
1514  ax.imshow(
1515  np.log(mtr),
1516  interpolation='nearest',
1517  vmin=0.,
1518  vmax=log(mtr.max()))
1519  ax.set_xticks([])
1520  ax.set_yticks([])
1521  if skip_xl == 0:
1522  try:
1523  mtr = Matrices_xl[(C[x1], C[x2])]
1524  except KeyError:
1525  mtr = Matrices_xl[(C[x2], C[x1])].T
1526  ax.spy(
1527  sparse.csr_matrix(mtr),
1528  markersize=10,
1529  color='white',
1530  linewidth=100,
1531  alpha=0.5)
1532  ax.set_xticks([])
1533  ax.set_yticks([])
1534 
1535  cnt += 1
1536  if x2 == 0:
1537  ax.set_ylabel(C[x1], rotation=90)
1538  if outname:
1539  plt.savefig(outname + ".pdf", dpi=300, transparent="False")
1540  else:
1541  plt.show()
1542 
1543 
1544 # ------------------------------------------------------------------
1545 # a few random tools
1546 
1547 def get_hiers_from_rmf(model, frame_number, rmf_file):
1548  # I have to deprecate this function
1549  print("getting coordinates for frame %i rmf file %s"
1550  % (frame_number, rmf_file))
1551 
1552  # load the frame
1553  rh = RMF.open_rmf_file_read_only(rmf_file)
1554 
1555  try:
1556  prots = IMP.rmf.create_hierarchies(rh, model)
1557  except IOError:
1558  print("Unable to open rmf file %s" % (rmf_file))
1559  return None
1560 
1561  try:
1562  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1563  except IOError:
1564  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1565  return None
1566  model.update()
1567  del rh
1568 
1569  return prots
1570 
1571 
1572 def link_hiers_to_rmf(model, hiers, frame_number, rmf_file):
1573  print("linking hierarchies for frame %i rmf file %s"
1574  % (frame_number, rmf_file))
1575  rh = RMF.open_rmf_file_read_only(rmf_file)
1576  IMP.rmf.link_hierarchies(rh, hiers)
1577  try:
1578  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1579  except: # noqa: E722
1580  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1581  return False
1582  model.update()
1583  del rh
1584  return True
1585 
1586 
1587 def get_hiers_and_restraints_from_rmf(model, frame_number, rmf_file):
1588  # I have to deprecate this function
1589  print("getting coordinates for frame %i rmf file %s"
1590  % (frame_number, rmf_file))
1591 
1592  # load the frame
1593  rh = RMF.open_rmf_file_read_only(rmf_file)
1594 
1595  try:
1596  prots = IMP.rmf.create_hierarchies(rh, model)
1597  rs = IMP.rmf.create_restraints(rh, model)
1598  except IOError:
1599  print("Unable to open rmf file %s" % (rmf_file))
1600  return None, None
1601  try:
1602  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1603  except: # noqa: E722
1604  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1605  return None, None
1606  model.update()
1607  del rh
1608 
1609  return prots, rs
1610 
1611 
1612 def link_hiers_and_restraints_to_rmf(model, hiers, rs, frame_number, rmf_file):
1613  print("linking hierarchies for frame %i rmf file %s"
1614  % (frame_number, rmf_file))
1615  rh = RMF.open_rmf_file_read_only(rmf_file)
1616  IMP.rmf.link_hierarchies(rh, hiers)
1617  IMP.rmf.link_restraints(rh, rs)
1618  try:
1619  IMP.rmf.load_frame(rh, RMF.FrameID(frame_number))
1620  except: # noqa: E722
1621  print("Unable to open frame %i of file %s" % (frame_number, rmf_file))
1622  return False
1623  model.update()
1624  del rh
1625  return True
1626 
1627 
1629  """Get particles at res 1, or any beads, based on the name.
1630  No Representation is needed. This is mainly used when the hierarchy
1631  is read from an RMF file.
1632 
1633  @return a dictionary of component names and their particles
1634  @note If the root node is named "System" or is a "State", do
1635  proper selection.
1636  """
1637  particle_dict = {}
1638 
1639  # attempt to give good results for PMI2
1640  if IMP.pmi.get_is_canonical(prot):
1641  for mol in IMP.atom.get_by_type(prot, IMP.atom.MOLECULE_TYPE):
1642  sel = IMP.atom.Selection(mol, resolution=1)
1643  particle_dict[mol.get_name()] = sel.get_selected_particles()
1644  else:
1645  allparticles = []
1646  for c in prot.get_children():
1647  name = c.get_name()
1648  particle_dict[name] = IMP.atom.get_leaves(c)
1649  for s in c.get_children():
1650  if "_Res:1" in s.get_name() and "_Res:10" not in s.get_name():
1651  allparticles += IMP.atom.get_leaves(s)
1652  if "Beads" in s.get_name():
1653  allparticles += IMP.atom.get_leaves(s)
1654 
1655  for name in particle_dict:
1656  particle_dict[name] = IMP.pmi.tools.sort_by_residues(
1657  list(set(particle_dict[name]) & set(allparticles)))
1658  return particle_dict
1659 
1660 
1662  """Get particles at res 10, or any beads, based on the name.
1663  No Representation is needed.
1664  This is mainly used when the hierarchy is read from an RMF file.
1665 
1666  @return a dictionary of component names and their particles
1667  @note If the root node is named "System" or is a "State", do proper
1668  selection.
1669  """
1670  particle_dict = {}
1671  # attempt to give good results for PMI2
1672  if IMP.pmi.get_is_canonical(prot):
1673  for mol in IMP.atom.get_by_type(prot, IMP.atom.MOLECULE_TYPE):
1674  sel = IMP.atom.Selection(mol, resolution=10)
1675  particle_dict[mol.get_name()] = sel.get_selected_particles()
1676  else:
1677  allparticles = []
1678  for c in prot.get_children():
1679  name = c.get_name()
1680  particle_dict[name] = IMP.atom.get_leaves(c)
1681  for s in c.get_children():
1682  if "_Res:10" in s.get_name():
1683  allparticles += IMP.atom.get_leaves(s)
1684  if "Beads" in s.get_name():
1685  allparticles += IMP.atom.get_leaves(s)
1686  for name in particle_dict:
1687  particle_dict[name] = IMP.pmi.tools.sort_by_residues(
1688  list(set(particle_dict[name]) & set(allparticles)))
1689  return particle_dict
1690 
1691 
1692 class CrossLinkTable(object):
1693  """Visualization of cross-links"""
1694  def __init__(self):
1695  self.crosslinks = []
1696  self.external_csv_data = None
1697  self.crosslinkedprots = set()
1698  self.mindist = +10000000.0
1699  self.maxdist = -10000000.0
1700  self.contactmap = None
1701 
1702  def set_hierarchy(self, prot):
1703  self.prot_length_dict = {}
1704  self.model = prot.get_model()
1705 
1706  for i in prot.get_children():
1707  name = i.get_name()
1708  residue_indexes = []
1709  for p in IMP.atom.get_leaves(i):
1710  residue_indexes.extend(IMP.pmi.tools.get_residue_indexes(p))
1711 
1712  if len(residue_indexes) != 0:
1713  self.prot_length_dict[name] = max(residue_indexes)
1714 
1715  def set_coordinates_for_contact_map(self, rmf_name, rmf_frame_index):
1716  from scipy.spatial.distance import cdist
1717 
1718  rh = RMF.open_rmf_file_read_only(rmf_name)
1719  prots = IMP.rmf.create_hierarchies(rh, self.model)
1720  IMP.rmf.load_frame(rh, RMF.FrameID(rmf_frame_index))
1721  print("getting coordinates for frame %i rmf file %s"
1722  % (rmf_frame_index, rmf_name))
1723  del rh
1724 
1725  coords = []
1726  radii = []
1727 
1728  particles_dictionary = get_particles_at_resolution_one(prots[0])
1729 
1730  resindex = 0
1731  self.index_dictionary = {}
1732 
1733  for name in particles_dictionary:
1734  residue_indexes = []
1735  for p in particles_dictionary[name]:
1736  print(p.get_name())
1737  residue_indexes = IMP.pmi.tools.get_residue_indexes(p)
1738 
1739  if len(residue_indexes) != 0:
1740 
1741  for res in range(min(residue_indexes),
1742  max(residue_indexes) + 1):
1743  d = IMP.core.XYZR(p)
1744 
1745  crd = np.array([d.get_x(), d.get_y(), d.get_z()])
1746  coords.append(crd)
1747  radii.append(d.get_radius())
1748  if name not in self.index_dictionary:
1749  self.index_dictionary[name] = [resindex]
1750  else:
1751  self.index_dictionary[name].append(resindex)
1752  resindex += 1
1753 
1754  coords = np.array(coords)
1755  radii = np.array(radii)
1756 
1757  distances = cdist(coords, coords)
1758  distances = (distances - radii).T - radii
1759 
1760  distances = np.where(distances <= 20.0, 1.0, 0)
1761  if self.contactmap is None:
1762  self.contactmap = np.zeros((len(coords), len(coords)))
1763  self.contactmap += distances
1764 
1765  for prot in prots:
1766  IMP.atom.destroy(prot)
1767 
1768  def set_crosslinks(
1769  self, data_file, search_label='ISDCrossLinkMS_Distance_',
1770  mapping=None, filter_label=None,
1771  # provide a list of rmf base names to filter the stat file
1772  filter_rmf_file_names=None, external_csv_data_file=None,
1773  external_csv_data_file_unique_id_key="Unique ID"):
1774 
1775  # example key ISDCrossLinkMS_Distance_intrarb_937-State:0-108:RPS3_55:RPS30-1-1-0.1_None # noqa: E501
1776  # mapping is a dictionary that maps standard keywords to entry
1777  # positions in the key string
1778  # confidence class is a filter that
1779  # external datafile is a datafile that contains further information
1780  # on the cross-links
1781  # it will use the unique id to create the dictionary keys
1782 
1783  po = IMP.pmi.output.ProcessOutput(data_file)
1784  keys = po.get_keys()
1785 
1786  xl_keys = [k for k in keys if search_label in k]
1787 
1788  if filter_rmf_file_names is not None:
1789  rmf_file_key = "local_rmf_file_name"
1790  fs = po.get_fields(xl_keys+[rmf_file_key])
1791  else:
1792  fs = po.get_fields(xl_keys)
1793 
1794  # this dictionary stores the occurency of given cross-links
1795  self.cross_link_frequency = {}
1796 
1797  # this dictionary stores the series of distances for given cross-linked
1798  # residues
1799  self.cross_link_distances = {}
1800 
1801  # this dictionary stores the series of distances for given cross-linked
1802  # residues
1803  self.cross_link_distances_unique = {}
1804 
1805  if external_csv_data_file is not None:
1806  # this dictionary stores the further information on cross-links
1807  # labeled by unique ID
1808  self.external_csv_data = {}
1809  xldb = IMP.pmi.tools.get_db_from_csv(external_csv_data_file)
1810 
1811  for xl in xldb:
1812  self.external_csv_data[
1813  xl[external_csv_data_file_unique_id_key]] = xl
1814 
1815  # this list keeps track the tuple of cross-links and sample
1816  # so that we don't count twice the same cross-linked residues in the
1817  # same sample
1818  cross_link_frequency_list = []
1819 
1820  self.unique_cross_link_list = []
1821 
1822  for key in xl_keys:
1823  print(key)
1824  keysplit = key.replace("_", " ").replace("-", " ").replace(
1825  ":", " ").split()
1826 
1827  if filter_label is not None:
1828  if filter_label not in keysplit:
1829  continue
1830 
1831  if mapping is None:
1832  r1 = int(keysplit[5])
1833  c1 = keysplit[6]
1834  r2 = int(keysplit[7])
1835  c2 = keysplit[8]
1836  try:
1837  confidence = keysplit[12]
1838  except: # noqa: E722
1839  confidence = '0.0'
1840  try:
1841  unique_identifier = keysplit[3]
1842  except: # noqa: E722
1843  unique_identifier = '0'
1844  else:
1845  r1 = int(keysplit[mapping["Residue1"]])
1846  c1 = keysplit[mapping["Protein1"]]
1847  r2 = int(keysplit[mapping["Residue2"]])
1848  c2 = keysplit[mapping["Protein2"]]
1849  try:
1850  confidence = keysplit[mapping["Confidence"]]
1851  except: # noqa: E722
1852  confidence = '0.0'
1853  try:
1854  unique_identifier = keysplit[mapping["Unique Identifier"]]
1855  except: # noqa: E722
1856  unique_identifier = '0'
1857 
1858  self.crosslinkedprots.add(c1)
1859  self.crosslinkedprots.add(c2)
1860 
1861  # construct the list of distances corresponding to the input rmf
1862  # files
1863 
1864  dists = []
1865  if filter_rmf_file_names is not None:
1866  for n, d in enumerate(fs[key]):
1867  if fs[rmf_file_key][n] in filter_rmf_file_names:
1868  dists.append(float(d))
1869  else:
1870  dists = [float(f) for f in fs[key]]
1871 
1872  # check if the input confidence class corresponds to the
1873  # one of the cross-link
1874 
1875  mdist = self.median(dists)
1876 
1877  stdv = np.std(np.array(dists))
1878  if self.mindist > mdist:
1879  self.mindist = mdist
1880  if self.maxdist < mdist:
1881  self.maxdist = mdist
1882 
1883  # calculate the frequency of unique cross-links within the same
1884  # sample
1885  if (r1, c1, r2, c2, mdist) not in cross_link_frequency_list:
1886  if (r1, c1, r2, c2) not in self.cross_link_frequency:
1887  self.cross_link_frequency[(r1, c1, r2, c2)] = 1
1888  self.cross_link_frequency[(r2, c2, r1, c1)] = 1
1889  else:
1890  self.cross_link_frequency[(r2, c2, r1, c1)] += 1
1891  self.cross_link_frequency[(r1, c1, r2, c2)] += 1
1892  cross_link_frequency_list.append((r1, c1, r2, c2))
1893  cross_link_frequency_list.append((r2, c2, r1, c1))
1894  self.unique_cross_link_list.append(
1895  (r1, c1, r2, c2, mdist))
1896 
1897  if (r1, c1, r2, c2) not in self.cross_link_distances:
1898  self.cross_link_distances[(
1899  r1, c1, r2, c2, mdist, confidence)] = dists
1900  self.cross_link_distances[(
1901  r2, c2, r1, c1, mdist, confidence)] = dists
1902  self.cross_link_distances_unique[(r1, c1, r2, c2)] = dists
1903  else:
1904  self.cross_link_distances[(
1905  r2, c2, r1, c1, mdist, confidence)] += dists
1906  self.cross_link_distances[(
1907  r1, c1, r2, c2, mdist, confidence)] += dists
1908 
1909  self.crosslinks.append(
1910  (r1, c1, r2, c2, mdist, stdv, confidence, unique_identifier,
1911  'original'))
1912  self.crosslinks.append(
1913  (r2, c2, r1, c1, mdist, stdv, confidence, unique_identifier,
1914  'reversed'))
1915 
1916  self.cross_link_frequency_inverted = {}
1917  for xl in self.unique_cross_link_list:
1918  (r1, c1, r2, c2, mdist) = xl
1919  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
1920  if frequency not in self.cross_link_frequency_inverted:
1921  self.cross_link_frequency_inverted[
1922  frequency] = [(r1, c1, r2, c2)]
1923  else:
1924  self.cross_link_frequency_inverted[
1925  frequency].append((r1, c1, r2, c2))
1926 
1927  # -------------
1928 
1929  def median(self, mylist):
1930  sorts = sorted(mylist)
1931  length = len(sorts)
1932  print(length)
1933  if length == 1:
1934  return mylist[0]
1935  if not length % 2:
1936  return (sorts[length / 2] + sorts[length / 2 - 1]) / 2.0
1937  return sorts[length / 2]
1938 
1939  def set_threshold(self, threshold):
1940  self.threshold = threshold
1941 
1942  def set_tolerance(self, tolerance):
1943  self.tolerance = tolerance
1944 
1945  def colormap(self, dist):
1946  if dist < self.threshold - self.tolerance:
1947  return "Green"
1948  elif dist >= self.threshold + self.tolerance:
1949  return "Orange"
1950  else:
1951  return "Red"
1952 
1953  def write_cross_link_database(self, filename, format='csv'):
1954  import csv
1955 
1956  fieldnames = [
1957  "Unique ID", "Protein1", "Residue1", "Protein2", "Residue2",
1958  "Median Distance", "Standard Deviation", "Confidence",
1959  "Frequency", "Arrangement"]
1960 
1961  if self.external_csv_data is not None:
1962  keys = list(self.external_csv_data.keys())
1963  innerkeys = list(self.external_csv_data[keys[0]].keys())
1964  innerkeys.sort()
1965  fieldnames += innerkeys
1966 
1967  dw = csv.DictWriter(
1968  open(filename, "w"),
1969  delimiter=',',
1970  fieldnames=fieldnames)
1971  dw.writeheader()
1972  for xl in self.crosslinks:
1973  (r1, c1, r2, c2, mdist, stdv, confidence,
1974  unique_identifier, descriptor) = xl
1975  if descriptor == 'original':
1976  outdict = {}
1977  outdict["Unique ID"] = unique_identifier
1978  outdict["Protein1"] = c1
1979  outdict["Protein2"] = c2
1980  outdict["Residue1"] = r1
1981  outdict["Residue2"] = r2
1982  outdict["Median Distance"] = mdist
1983  outdict["Standard Deviation"] = stdv
1984  outdict["Confidence"] = confidence
1985  outdict["Frequency"] = self.cross_link_frequency[
1986  (r1, c1, r2, c2)]
1987  if c1 == c2:
1988  arrangement = "Intra"
1989  else:
1990  arrangement = "Inter"
1991  outdict["Arrangement"] = arrangement
1992  if self.external_csv_data is not None:
1993  outdict.update(self.external_csv_data[unique_identifier])
1994 
1995  dw.writerow(outdict)
1996 
1997  def plot(self, prot_listx=None, prot_listy=None, no_dist_info=False,
1998  no_confidence_info=False, filter=None, layout="whole",
1999  crosslinkedonly=False, filename=None, confidence_classes=None,
2000  alphablend=0.1, scale_symbol_size=1.0, gap_between_components=0,
2001  rename_protein_map=None):
2002  # layout can be:
2003  # "lowerdiagonal" print only the lower diagonal plot
2004  # "upperdiagonal" print only the upper diagonal plot
2005  # "whole" print all
2006  # crosslinkedonly: plot only components that have cross-links
2007  # no_dist_info: if True will plot only the cross-links as grey spots
2008  # filter = tuple the tuple contains a keyword to be search
2009  # in the database
2010  # a relationship ">","==","<"
2011  # and a value
2012  # example ("ID_Score",">",40)
2013  # scale_symbol_size rescale the symbol for the cross-link
2014  # rename_protein_map is a dictionary to rename proteins
2015 
2016  import matplotlib as mpl
2017  mpl.use('Agg')
2018  import matplotlib.pyplot as plt
2019  import matplotlib.cm as cm
2020 
2021  fig = plt.figure(figsize=(10, 10))
2022  ax = fig.add_subplot(111)
2023 
2024  ax.set_xticks([])
2025  ax.set_yticks([])
2026 
2027  # set the list of proteins on the x axis
2028  if prot_listx is None:
2029  if crosslinkedonly:
2030  prot_listx = list(self.crosslinkedprots)
2031  else:
2032  prot_listx = list(self.prot_length_dict.keys())
2033  prot_listx.sort()
2034 
2035  nresx = gap_between_components + \
2036  sum([self.prot_length_dict[name]
2037  + gap_between_components for name in prot_listx])
2038 
2039  # set the list of proteins on the y axis
2040 
2041  if prot_listy is None:
2042  if crosslinkedonly:
2043  prot_listy = list(self.crosslinkedprots)
2044  else:
2045  prot_listy = list(self.prot_length_dict.keys())
2046  prot_listy.sort()
2047 
2048  nresy = gap_between_components + \
2049  sum([self.prot_length_dict[name]
2050  + gap_between_components for name in prot_listy])
2051 
2052  # this is the residue offset for each protein
2053  resoffsetx = {}
2054  resendx = {}
2055  res = gap_between_components
2056  for prot in prot_listx:
2057  resoffsetx[prot] = res
2058  res += self.prot_length_dict[prot]
2059  resendx[prot] = res
2060  res += gap_between_components
2061 
2062  resoffsety = {}
2063  resendy = {}
2064  res = gap_between_components
2065  for prot in prot_listy:
2066  resoffsety[prot] = res
2067  res += self.prot_length_dict[prot]
2068  resendy[prot] = res
2069  res += gap_between_components
2070 
2071  resoffsetdiagonal = {}
2072  res = gap_between_components
2073  for prot in IMP.pmi.tools.OrderedSet(prot_listx + prot_listy):
2074  resoffsetdiagonal[prot] = res
2075  res += self.prot_length_dict[prot]
2076  res += gap_between_components
2077 
2078  # plot protein boundaries
2079 
2080  xticks = []
2081  xlabels = []
2082  for n, prot in enumerate(prot_listx):
2083  res = resoffsetx[prot]
2084  end = resendx[prot]
2085  for proty in prot_listy:
2086  resy = resoffsety[proty]
2087  endy = resendy[proty]
2088  ax.plot([res, res], [resy, endy], 'k-', lw=0.4)
2089  ax.plot([end, end], [resy, endy], 'k-', lw=0.4)
2090  xticks.append((float(res) + float(end)) / 2)
2091  if rename_protein_map is not None:
2092  if prot in rename_protein_map:
2093  prot = rename_protein_map[prot]
2094  xlabels.append(prot)
2095 
2096  yticks = []
2097  ylabels = []
2098  for n, prot in enumerate(prot_listy):
2099  res = resoffsety[prot]
2100  end = resendy[prot]
2101  for protx in prot_listx:
2102  resx = resoffsetx[protx]
2103  endx = resendx[protx]
2104  ax.plot([resx, endx], [res, res], 'k-', lw=0.4)
2105  ax.plot([resx, endx], [end, end], 'k-', lw=0.4)
2106  yticks.append((float(res) + float(end)) / 2)
2107  if rename_protein_map is not None:
2108  if prot in rename_protein_map:
2109  prot = rename_protein_map[prot]
2110  ylabels.append(prot)
2111 
2112  # plot the contact map
2113  print(prot_listx, prot_listy)
2114 
2115  if self.contactmap is not None:
2116  tmp_array = np.zeros((nresx, nresy))
2117 
2118  for px in prot_listx:
2119  print(px)
2120  for py in prot_listy:
2121  print(py)
2122  resx = resoffsety[px]
2123  lengx = resendx[px] - 1
2124  resy = resoffsety[py]
2125  lengy = resendy[py] - 1
2126  indexes_x = self.index_dictionary[px]
2127  minx = min(indexes_x)
2128  maxx = max(indexes_x)
2129  indexes_y = self.index_dictionary[py]
2130  miny = min(indexes_y)
2131  maxy = max(indexes_y)
2132 
2133  print(px, py, minx, maxx, miny, maxy)
2134 
2135  try:
2136  tmp_array[
2137  resx:lengx,
2138  resy:lengy] = self.contactmap[
2139  minx:maxx,
2140  miny:maxy]
2141  except: # noqa: E722
2142  continue
2143 
2144  ax.imshow(tmp_array,
2145  cmap=cm.binary,
2146  origin='lower',
2147  interpolation='nearest')
2148 
2149  ax.set_xticks(xticks)
2150  ax.set_xticklabels(xlabels, rotation=90)
2151  ax.set_yticks(yticks)
2152  ax.set_yticklabels(ylabels)
2153  ax.set_xlim(0, nresx)
2154  ax.set_ylim(0, nresy)
2155 
2156  # set the cross-links
2157 
2158  already_added_xls = []
2159 
2160  for xl in self.crosslinks:
2161 
2162  (r1, c1, r2, c2, mdist, stdv, confidence,
2163  unique_identifier, descriptor) = xl
2164 
2165  if confidence_classes is not None:
2166  if confidence not in confidence_classes:
2167  continue
2168 
2169  try:
2170  pos1 = r1 + resoffsetx[c1]
2171  except: # noqa: E722
2172  continue
2173  try:
2174  pos2 = r2 + resoffsety[c2]
2175  except: # noqa: E722
2176  continue
2177 
2178  if filter is not None:
2179  xldb = self.external_csv_data[unique_identifier]
2180  xldb.update({"Distance": mdist})
2181  xldb.update({"Distance_stdv": stdv})
2182 
2183  if filter[1] == ">":
2184  if float(xldb[filter[0]]) <= float(filter[2]):
2185  continue
2186 
2187  if filter[1] == "<":
2188  if float(xldb[filter[0]]) >= float(filter[2]):
2189  continue
2190 
2191  if filter[1] == "==":
2192  if float(xldb[filter[0]]) != float(filter[2]):
2193  continue
2194 
2195  # all that below is used for plotting the diagonal
2196  # when you have a rectangolar plots
2197 
2198  pos_for_diagonal1 = r1 + resoffsetdiagonal[c1]
2199  pos_for_diagonal2 = r2 + resoffsetdiagonal[c2]
2200 
2201  if layout == 'lowerdiagonal':
2202  if pos_for_diagonal1 <= pos_for_diagonal2:
2203  continue
2204  if layout == 'upperdiagonal':
2205  if pos_for_diagonal1 >= pos_for_diagonal2:
2206  continue
2207 
2208  already_added_xls.append((r1, c1, r2, c2))
2209 
2210  if not no_confidence_info:
2211  if confidence == '0.01':
2212  markersize = 14 * scale_symbol_size
2213  elif confidence == '0.05':
2214  markersize = 9 * scale_symbol_size
2215  elif confidence == '0.1':
2216  markersize = 6 * scale_symbol_size
2217  else:
2218  markersize = 15 * scale_symbol_size
2219  else:
2220  markersize = 5 * scale_symbol_size
2221 
2222  if not no_dist_info:
2223  color = self.colormap(mdist)
2224  else:
2225  color = "gray"
2226 
2227  ax.plot(
2228  [pos1],
2229  [pos2],
2230  'o',
2231  c=color,
2232  alpha=alphablend,
2233  markersize=markersize)
2234 
2235  fig.set_size_inches(0.004 * nresx, 0.004 * nresy)
2236 
2237  for i in ax.spines.values():
2238  i.set_linewidth(2.0)
2239 
2240  if filename:
2241  plt.savefig(filename + ".pdf", dpi=300, transparent="False")
2242  else:
2243  plt.show()
2244 
2245  def get_frequency_statistics(self, prot_list,
2246  prot_list2=None):
2247 
2248  violated_histogram = {}
2249  satisfied_histogram = {}
2250  unique_cross_links = []
2251 
2252  for xl in self.unique_cross_link_list:
2253  (r1, c1, r2, c2, mdist) = xl
2254 
2255  # here we filter by the protein
2256  if prot_list2 is None:
2257  if c1 not in prot_list:
2258  continue
2259  if c2 not in prot_list:
2260  continue
2261  else:
2262  if c1 in prot_list and c2 in prot_list2:
2263  pass
2264  elif c1 in prot_list2 and c2 in prot_list:
2265  pass
2266  else:
2267  continue
2268 
2269  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
2270 
2271  if (r1, c1, r2, c2) not in unique_cross_links:
2272  if mdist > 35.0:
2273  if frequency not in violated_histogram:
2274  violated_histogram[frequency] = 1
2275  else:
2276  violated_histogram[frequency] += 1
2277  else:
2278  if frequency not in satisfied_histogram:
2279  satisfied_histogram[frequency] = 1
2280  else:
2281  satisfied_histogram[frequency] += 1
2282  unique_cross_links.append((r1, c1, r2, c2))
2283  unique_cross_links.append((r2, c2, r1, c1))
2284 
2285  print("# satisfied")
2286 
2287  total_number_of_crosslinks = 0
2288 
2289  for i in satisfied_histogram:
2290  # if i in violated_histogram:
2291  # print i, satisfied_histogram[i]+violated_histogram[i]
2292  # else:
2293  if i in violated_histogram:
2294  print(i, violated_histogram[i]+satisfied_histogram[i])
2295  else:
2296  print(i, satisfied_histogram[i])
2297  total_number_of_crosslinks += i*satisfied_histogram[i]
2298 
2299  print("# violated")
2300 
2301  for i in violated_histogram:
2302  print(i, violated_histogram[i])
2303  total_number_of_crosslinks += i*violated_histogram[i]
2304 
2305  print(total_number_of_crosslinks)
2306 
2307  def print_cross_link_binary_symbols(self, prot_list,
2308  prot_list2=None):
2309  tmp_matrix = []
2310  confidence_list = []
2311  for xl in self.crosslinks:
2312  (r1, c1, r2, c2, mdist, stdv, confidence,
2313  unique_identifier, descriptor) = xl
2314 
2315  if prot_list2 is None:
2316  if c1 not in prot_list:
2317  continue
2318  if c2 not in prot_list:
2319  continue
2320  else:
2321  if c1 in prot_list and c2 in prot_list2:
2322  pass
2323  elif c1 in prot_list2 and c2 in prot_list:
2324  pass
2325  else:
2326  continue
2327 
2328  if descriptor != "original":
2329  continue
2330 
2331  confidence_list.append(confidence)
2332 
2333  dists = self.cross_link_distances_unique[(r1, c1, r2, c2)]
2334  tmp_dist_binary = []
2335  for d in dists:
2336  if d < 35:
2337  tmp_dist_binary.append(1)
2338  else:
2339  tmp_dist_binary.append(0)
2340  tmp_matrix.append(tmp_dist_binary)
2341 
2342  matrix = list(zip(*tmp_matrix))
2343 
2344  satisfied_high_sum = 0
2345  satisfied_mid_sum = 0
2346  satisfied_low_sum = 0
2347  total_satisfied_sum = 0
2348  for k, m in enumerate(matrix):
2349  satisfied_high = 0
2350  total_high = 0
2351  satisfied_mid = 0
2352  total_mid = 0
2353  satisfied_low = 0
2354  total_low = 0
2355  total_satisfied = 0
2356  total = 0
2357  for n, b in enumerate(m):
2358  if confidence_list[n] == "0.01":
2359  total_high += 1
2360  if b == 1:
2361  satisfied_high += 1
2362  satisfied_high_sum += 1
2363  elif confidence_list[n] == "0.05":
2364  total_mid += 1
2365  if b == 1:
2366  satisfied_mid += 1
2367  satisfied_mid_sum += 1
2368  elif confidence_list[n] == "0.1":
2369  total_low += 1
2370  if b == 1:
2371  satisfied_low += 1
2372  satisfied_low_sum += 1
2373  if b == 1:
2374  total_satisfied += 1
2375  total_satisfied_sum += 1
2376  total += 1
2377  print(k, satisfied_high, total_high)
2378  print(k, satisfied_mid, total_mid)
2379  print(k, satisfied_low, total_low)
2380  print(k, total_satisfied, total)
2381  print(float(satisfied_high_sum) / len(matrix))
2382  print(float(satisfied_mid_sum) / len(matrix))
2383  print(float(satisfied_low_sum) / len(matrix))
2384 # ------------
2385 
2386  def get_unique_crosslinks_statistics(self, prot_list,
2387  prot_list2=None):
2388 
2389  print(prot_list)
2390  print(prot_list2)
2391  satisfied_high = 0
2392  total_high = 0
2393  satisfied_mid = 0
2394  total_mid = 0
2395  satisfied_low = 0
2396  total_low = 0
2397  total = 0
2398  tmp_matrix = []
2399  satisfied_string = []
2400  for xl in self.crosslinks:
2401  (r1, c1, r2, c2, mdist, stdv, confidence,
2402  unique_identifier, descriptor) = xl
2403 
2404  if prot_list2 is None:
2405  if c1 not in prot_list:
2406  continue
2407  if c2 not in prot_list:
2408  continue
2409  else:
2410  if c1 in prot_list and c2 in prot_list2:
2411  pass
2412  elif c1 in prot_list2 and c2 in prot_list:
2413  pass
2414  else:
2415  continue
2416 
2417  if descriptor != "original":
2418  continue
2419 
2420  total += 1
2421  if confidence == "0.01":
2422  total_high += 1
2423  if mdist <= 35:
2424  satisfied_high += 1
2425  if confidence == "0.05":
2426  total_mid += 1
2427  if mdist <= 35:
2428  satisfied_mid += 1
2429  if confidence == "0.1":
2430  total_low += 1
2431  if mdist <= 35:
2432  satisfied_low += 1
2433  if mdist <= 35:
2434  satisfied_string.append(1)
2435  else:
2436  satisfied_string.append(0)
2437 
2438  dists = self.cross_link_distances_unique[(r1, c1, r2, c2)]
2439  tmp_dist_binary = []
2440  for d in dists:
2441  if d < 35:
2442  tmp_dist_binary.append(1)
2443  else:
2444  tmp_dist_binary.append(0)
2445  tmp_matrix.append(tmp_dist_binary)
2446 
2447  print("unique satisfied_high/total_high", satisfied_high, "/",
2448  total_high)
2449  print("unique satisfied_mid/total_mid", satisfied_mid, "/", total_mid)
2450  print("unique satisfied_low/total_low", satisfied_low, "/", total_low)
2451  print("total", total)
2452 
2453  matrix = list(zip(*tmp_matrix))
2454  satisfied_models = 0
2455  satstr = ""
2456  for b in satisfied_string:
2457  if b == 0:
2458  satstr += "-"
2459  if b == 1:
2460  satstr += "*"
2461 
2462  for m in matrix:
2463  all_satisfied = True
2464  string = ""
2465  for n, b in enumerate(m):
2466  if b == 0:
2467  string += "0"
2468  if b == 1:
2469  string += "1"
2470  if b == 1 and satisfied_string[n] == 1:
2471  pass
2472  elif b == 1 and satisfied_string[n] == 0:
2473  pass
2474  elif b == 0 and satisfied_string[n] == 0:
2475  pass
2476  elif b == 0 and satisfied_string[n] == 1:
2477  all_satisfied = False
2478  if all_satisfied:
2479  satisfied_models += 1
2480  print(string)
2481  print(satstr, all_satisfied)
2482  print("models that satisfies the median satisfied cross-links/total "
2483  "models", satisfied_models, len(matrix))
2484 
2485  def plot_matrix_cross_link_distances_unique(self, figurename, prot_list,
2486  prot_list2=None):
2487 
2488  import matplotlib as mpl
2489  mpl.use('Agg')
2490  import matplotlib.pylab as pl
2491 
2492  tmp_matrix = []
2493  for kw in self.cross_link_distances_unique:
2494  (r1, c1, r2, c2) = kw
2495  dists = self.cross_link_distances_unique[kw]
2496 
2497  if prot_list2 is None:
2498  if c1 not in prot_list:
2499  continue
2500  if c2 not in prot_list:
2501  continue
2502  else:
2503  if c1 in prot_list and c2 in prot_list2:
2504  pass
2505  elif c1 in prot_list2 and c2 in prot_list:
2506  pass
2507  else:
2508  continue
2509  # append the sum of dists to order by that in the matrix plot
2510  dists.append(sum(dists))
2511  tmp_matrix.append(dists)
2512 
2513  tmp_matrix.sort(key=itemgetter(len(tmp_matrix[0]) - 1))
2514 
2515  # print len(tmp_matrix), len(tmp_matrix[0])-1
2516  matrix = np.zeros((len(tmp_matrix), len(tmp_matrix[0]) - 1))
2517 
2518  for i in range(len(tmp_matrix)):
2519  for k in range(len(tmp_matrix[i]) - 1):
2520  matrix[i][k] = tmp_matrix[i][k]
2521 
2522  print(matrix)
2523 
2524  fig = pl.figure()
2525  ax = fig.add_subplot(211)
2526 
2527  cax = ax.imshow(matrix, interpolation='nearest')
2528  fig.colorbar(cax)
2529  pl.savefig(figurename, dpi=300)
2530  pl.show()
2531 
2532  def plot_bars(
2533  self,
2534  filename,
2535  prots1,
2536  prots2,
2537  nxl_per_row=20,
2538  arrangement="inter",
2539  confidence_input="None"):
2540 
2541  data = []
2542  for xl in self.cross_link_distances:
2543  (r1, c1, r2, c2, mdist, confidence) = xl
2544  if c1 in prots1 and c2 in prots2:
2545  if arrangement == "inter" and c1 == c2:
2546  continue
2547  if arrangement == "intra" and c1 != c2:
2548  continue
2549  if confidence_input == confidence:
2550  label = str(c1) + ":" + str(r1) + \
2551  "-" + str(c2) + ":" + str(r2)
2552  values = self.cross_link_distances[xl]
2553  frequency = self.cross_link_frequency[(r1, c1, r2, c2)]
2554  data.append((label, values, mdist, frequency))
2555 
2556  sort_by_dist = sorted(data, key=lambda tup: tup[2])
2557  sort_by_dist = list(zip(*sort_by_dist))
2558  values = sort_by_dist[1]
2559  positions = list(range(len(values)))
2560  labels = sort_by_dist[0]
2561  frequencies = list(map(float, sort_by_dist[3]))
2562  frequencies = [f * 10.0 for f in frequencies]
2563 
2564  nchunks = int(float(len(values)) / nxl_per_row)
2565  values_chunks = IMP.pmi.tools.chunk_list_into_segments(values, nchunks)
2566  positions_chunks = IMP.pmi.tools.chunk_list_into_segments(
2567  positions,
2568  nchunks)
2569  frequencies_chunks = IMP.pmi.tools.chunk_list_into_segments(
2570  frequencies,
2571  nchunks)
2572  labels_chunks = IMP.pmi.tools.chunk_list_into_segments(labels, nchunks)
2573 
2574  for n, v in enumerate(values_chunks):
2575  p = positions_chunks[n]
2576  f = frequencies_chunks[n]
2578  filename + "." + str(n), v, p, f,
2579  valuename="Distance (Ang)",
2580  positionname="Unique " + arrangement + " Crosslinks",
2581  xlabels=labels_chunks[n])
2582 
2583  def crosslink_distance_histogram(self, filename,
2584  prot_list=None,
2585  prot_list2=None,
2586  confidence_classes=None,
2587  bins=40,
2588  color='#66CCCC',
2589  yplotrange=[0, 1],
2590  format="png",
2591  normalized=False):
2592  if prot_list is None:
2593  prot_list = list(self.prot_length_dict.keys())
2594 
2595  distances = []
2596  for xl in self.crosslinks:
2597  (r1, c1, r2, c2, mdist, stdv, confidence,
2598  unique_identifier, descriptor) = xl
2599 
2600  if confidence_classes is not None:
2601  if confidence not in confidence_classes:
2602  continue
2603 
2604  if prot_list2 is None:
2605  if c1 not in prot_list:
2606  continue
2607  if c2 not in prot_list:
2608  continue
2609  else:
2610  if c1 in prot_list and c2 in prot_list2:
2611  pass
2612  elif c1 in prot_list2 and c2 in prot_list:
2613  pass
2614  else:
2615  continue
2616 
2617  distances.append(mdist)
2618 
2620  filename, distances, valuename="C-alpha C-alpha distance [Ang]",
2621  bins=bins, color=color,
2622  format=format,
2623  reference_xline=35.0,
2624  yplotrange=yplotrange, normalized=normalized)
2625 
2626  def scatter_plot_xl_features(self, filename,
2627  feature1=None,
2628  feature2=None,
2629  prot_list=None,
2630  prot_list2=None,
2631  yplotrange=None,
2632  reference_ylines=None,
2633  distance_color=True,
2634  format="png"):
2635  import matplotlib as mpl
2636  mpl.use('Agg')
2637  import matplotlib.pyplot as plt
2638 
2639  fig = plt.figure(figsize=(10, 10))
2640  ax = fig.add_subplot(111)
2641 
2642  for xl in self.crosslinks:
2643  (r1, c1, r2, c2, mdist, stdv, confidence,
2644  unique_identifier, arrangement) = xl
2645 
2646  if prot_list2 is None:
2647  if c1 not in prot_list:
2648  continue
2649  if c2 not in prot_list:
2650  continue
2651  else:
2652  if c1 in prot_list and c2 in prot_list2:
2653  pass
2654  elif c1 in prot_list2 and c2 in prot_list:
2655  pass
2656  else:
2657  continue
2658 
2659  xldb = self.external_csv_data[unique_identifier]
2660  xldb.update({"Distance": mdist})
2661  xldb.update({"Distance_stdv": stdv})
2662 
2663  xvalue = float(xldb[feature1])
2664  yvalue = float(xldb[feature2])
2665 
2666  if distance_color:
2667  color = self.colormap(mdist)
2668  else:
2669  color = "gray"
2670 
2671  ax.plot([xvalue], [yvalue], 'o', c=color, alpha=0.1, markersize=7)
2672 
2673  if yplotrange is not None:
2674  ax.set_ylim(yplotrange)
2675  if reference_ylines is not None:
2676  for rl in reference_ylines:
2677  ax.axhline(rl, color='red', linestyle='dashed', linewidth=1)
2678 
2679  if filename:
2680  plt.savefig(filename + "." + format, dpi=150, transparent="False")
2681 
2682  plt.show()
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Definition: Molecule.h:35
def get_output
Returns output for IMP.pmi.output.Output object.
Simple 3D transformation class.
Visualization of cross-links.
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
double get_drms(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
A class for reading stat files (either rmf or ascii v1 and v2)
Definition: output.py:947
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
def plot_field_histogram
Plot a list of histograms from a value list.
Definition: output.py:1770
def plot_fields_box_plots
Plot time series as boxplots.
Definition: output.py:1830
double get_drmsd(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1)
Calculate distance the root mean square deviation between two sets of 3D points.
Definition: atom/distance.h:49
def get_molecules
This function returns the parent molecule hierarchies of given objects.
Definition: tools.py:1232
Miscellaneous utilities.
Definition: tools.py:1
def __init__
Constructor.
double get_weighted_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2, const Floats &weights)
Compute the RMSD (without alignment) taking into account the copy ambiguity.
void link_restraints(RMF::FileConstHandle fh, const Restraints &hs)
A class to evaluate the precision of an ensemble.
log
Definition: log.py:1
A class to cluster structures.
def get_rmsd_wrt_reference_structure_with_alignment
First align then calculate RMSD.
static Molecule setup_particle(Model *m, ParticleIndex pi)
Definition: Molecule.h:37
def __init__
Constructor.
Definition: pmi/Analysis.py:31
Class for sampling a density map from particles.
A decorator for keeping track of copies of a molecule.
Definition: Copy.h:28
def add_structure
Read a structure into the ensemble and store (as coordinates).
DensityMap * multiply(const DensityMap *m1, const DensityMap *m2)
Return a density map for which voxel i contains the result of m1[i]*m2[i].
Performs alignment and RMSD calculation for two sets of coordinates.
Definition: pmi/Analysis.py:22
def add_subunits_density
Add a frame to the densities.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
Definition: DensityMap.h:650
def scatter_and_gather
Synchronize data over a parallel run.
Definition: tools.py:612
double get_rmsd(const Vector3DsOrXYZs0 &m1, const Vector3DsOrXYZs1 &m2)
Basic utilities for handling cryo-electron microscopy 3D density maps.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
def get_particles_at_resolution_one
Get particles at res 1, or any beads, based on the name.
def fill
Add coordinates for a single model.
Tools for clustering and cluster analysis.
Definition: pmi/Analysis.py:1
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Definition: pmi/utilities.h:91
def do_cluster
Run K-means clustering.
3D rotation class.
Definition: Rotation3D.h:51
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:481
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Definition: output.py:1
def set_reference_structure
Read in a structure used for reference computation.
def get_rmsf
Calculate the residue mean square fluctuations (RMSF).
def get_average_distance_wrt_reference_structure
Compare the structure set to the reference structure.
def get_density
Get the current density for some component name.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
The general base class for IMP exceptions.
Definition: exception.h:48
def get_precision
Evaluate the precision of two named structure groups.
VectorD< 3 > Vector3D
Definition: VectorD.h:421
Restraints create_restraints(RMF::FileConstHandle fh, Model *m)
def get_moldict_coorddict
return data structure for the RMSD calculation
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
def get_particles_at_resolution_ten
Get particles at res 10, or any beads, based on the name.
Python classes to represent, score, sample and analyze models.
def add_structures
Read a list of RMFs, supports parallel.
Transformation3D get_transformation_aligning_first_to_second(Vector3Ds a, Vector3Ds b)
Hierarchies get_leaves(const Selection &h)
double get_drmsd_Q(const Vector3DsOrXYZs0 &m0, const Vector3DsOrXYZs1 &m1, double threshold)
Definition: atom/distance.h:85
Select hierarchy particles identified by the biological name.
Definition: Selection.h:70
Compute mean density maps from structures.
Support for the RMF file format for storing hierarchical molecular data and markup.
def get_residue_indexes
Retrieve the residue indexes for the given particle.
Definition: tools.py:569
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27