IMP logo
IMP Reference Guide  2.16.0
The Integrative Modeling Platform
rmsd_calculation.py
1 from __future__ import print_function
2 import pyRMSD.RMSDCalculator
3 from pyRMSD.condensedMatrix import CondensedMatrix
4 
5 import numpy as np
6 import os
7 import glob
8 
9 import IMP
10 import IMP.atom
11 import IMP.rmf
12 import RMF
13 
14 
15 def parse_rmsd_selection(h, selection, resolution=1):
16  s0 = None
17  for domain_list in selection.values():
18  # each element of the dictionary is a list of domains
19  # each domain is a tuple like (start,end,protein) or (protein)
20  # to add copy number use (start,end,protein.copy_number)
21  # or (protein.copy_number)
22 
23  for domain in domain_list:
24 
25  start_res = int(domain[0])
26  end_res = int(domain[1])
27 
28  prot_plus_copy = domain[2]
29 
30  if "." in prot_plus_copy:
31  copy_number = int(prot_plus_copy.split(".")[1])
32  prot_name = prot_plus_copy.split(".")[0]
33 
34  else:
35  copy_number = 0
36  prot_name = prot_plus_copy
37 
38  s = IMP.atom.Selection(h, resolution=resolution,
39  molecule=prot_name,
40  copy_index=copy_number,
41  residue_indexes=range(start_res, end_res+1))
42 
43  if s0:
44  s0 |= s
45  else:
46  s0 = s
47  return s0
48 
49 
50 def get_pdbs_coordinates(path, idfile_A, idfile_B):
51  pts = []
52  conform = []
53  num = 0
54  masses = []
55  radii = []
56 
57  models_name = []
58 
59  with open(idfile_A, 'w+') as f1:
60  for str_file in sorted(
61  glob.glob("%s/sample_A/*.pdb" % path),
62  key=lambda x: int(x.split('/')[-1].split('.')[0])):
63  print(str_file, num, file=f1)
64  models_name.append(str_file)
65 
66  m = IMP.Model()
67  mh = IMP.atom.read_pdb(str_file, m,
69  mps = IMP.core.get_leaves(mh)
70  pts = [IMP.core.XYZ(p).get_coordinates() for p in mps]
71  if num == 0:
72  masses = [IMP.atom.Mass(p).get_mass() for p in mps]
73  radii = [IMP.core.XYZR(p).get_radius() for p in mps]
74  conform.append(pts)
75  pts = []
76  num = num + 1
77 
78  with open(idfile_B, 'w+') as f2:
79  for str_file in sorted(
80  glob.glob("%s/sample_B/*.pdb" % path),
81  key=lambda x: int(x.split('/')[-1].split('.')[0])):
82  print(str_file, num, file=f2)
83  models_name.append(str_file)
84 
85  m = IMP.Model()
86  mh = IMP.atom.read_pdb(str_file, m,
88  mps = IMP.core.get_leaves(mh)
89  pts = [IMP.core.XYZ(p).get_coordinates() for p in mps]
90  conform.append(pts)
91  pts = []
92  num = num + 1
93 
94  return np.array(conform), masses, radii, models_name
95 
96 
97 def get_rmfs_coordinates(path, idfile_A, idfile_B,
98  subunit_name=None, selection=None, resolution=1):
99 
100  conform = []
101  num = 0
102  masses = []
103  radii = []
104  ps_names = []
105 
106  f1 = open(idfile_A, 'w+')
107  f2 = open(idfile_B, 'w+')
108 
109  models_name = []
110 
111  for sample_name, sample_id_file in zip(['A', 'B'], [f1, f2]):
112 
113  for str_file in sorted(
114  glob.glob("%s/sample_%s/*.rmf3" % (path, sample_name)),
115  key=lambda x: int(x.split('/')[-1].split('.')[0])):
116  print(str_file, num, file=sample_id_file)
117  models_name.append(str_file)
118 
119  m = IMP.Model()
120  inf = RMF.open_rmf_file_read_only(str_file)
121  h = IMP.rmf.create_hierarchies(inf, m)[0]
122  IMP.rmf.load_frame(inf, 0)
123 
124  pts = []
125 
126  if subunit_name:
127  s0 = IMP.atom.Selection(h, resolution=resolution,
128  molecule=subunit_name)
129  elif selection is not None:
130  s0 = parse_rmsd_selection(h, selection, resolution)
131  else:
132  s0 = IMP.atom.Selection(h, resolution=resolution)
133 
134  for leaf in s0.get_selected_particles():
135 
136  p = IMP.core.XYZR(leaf)
137  pts.append([p.get_coordinates()[i] for i in range(3)])
138 
139  if num == 0 and sample_name == 'A':
140  masses.append(IMP.atom.Mass(leaf).get_mass())
141  radii.append(p.get_radius())
142  mol_name = \
144  # traverse up the Hierarchy to get copy number of the
145  # molecule that the bead belongs to
146  copy_number = \
148 
150  # TODO not tested on non-fragment systems
151  residues_in_bead = \
152  IMP.atom.Fragment(leaf).get_residue_indexes()
153 
154  ps_names.append(
155  mol_name + "_" + str(min(residues_in_bead)) + "_"
156  + str(max(residues_in_bead)) + "_"
157  + str(copy_number))
158 
159  else:
160  residue_in_bead = \
161  str(IMP.atom.Residue(leaf).get_index())
162 
163  ps_names.append(mol_name + "_" + residue_in_bead + "_"
164  + residue_in_bead + "_"
165  + str(copy_number))
166 
167  conform.append(pts)
168  pts = []
169  num = num + 1
170  f1.close()
171  f2.close()
172 
173  return ps_names, masses, radii, np.array(conform), models_name
174 
175 
176 def parse_symmetric_groups_file(symm_groups_file):
177  symm_groups = []
178  member_to_symm_group = {}
179  first_group_member = []
180  curr_particle_index_in_group = []
181 
182  sgf = open(symm_groups_file, 'r')
183 
184  for indx, ln in enumerate(sgf.readlines()):
185 
186  symm_groups.append([]) # create new symm group list
187  # particle index for new symmetric group
188  curr_particle_index_in_group.append(-1)
189  fields = ln.strip().split()
190 
191  for fld in fields:
192  # group that the current protein copy belongs to
193  member_to_symm_group[fld] = indx
194 
195  # the first group member is special! We create a symm group list
196  # of particles for the first group member
197  first_group_member.append(fields[0])
198 
199  sgf.close()
200 
201  return (symm_groups, member_to_symm_group, curr_particle_index_in_group,
202  first_group_member)
203 
204 
205 def get_rmfs_coordinates_one_rmf(path, rmf_A, rmf_B, subunit_name=None,
206  symm_groups_file=None, selection=None,
207  resolution=1):
208  '''Modified RMF coordinates function to work with symmetric copies'''
209 
210  # Open RMFs and get total number of models
211  rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_A))
212  n_models = [rmf_fh.get_number_of_frames()]
213  rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_B))
214  n_models.append(rmf_fh.get_number_of_frames())
215 
216  masses = []
217  radii = []
218  ps_names = []
219 
220  models_name = []
221 
222  # Build hierarchy from the RMF file
223  m = IMP.Model()
224  h = IMP.rmf.create_hierarchies(rmf_fh, m)[0]
225  IMP.rmf.load_frame(rmf_fh, 0)
226  m.update()
227 
228  ######
229  # Initialize output array
230  pts = 0 # number of particles in each model
231 
232  # Get selection
233  if subunit_name:
234  s0 = IMP.atom.Selection(h, resolution=resolution,
235  molecule=subunit_name)
236  elif selection is not None:
237  s0 = parse_rmsd_selection(h, selection, resolution)
238  else:
239  s0 = IMP.atom.Selection(h, resolution=resolution)
240 
241  # Count particles
242  for leaf in s0.get_selected_particles():
243  p = IMP.core.XYZR(leaf)
244  pts += 1
245 
246  # Initialize array
247  conform = np.empty([n_models[0]+n_models[1], pts, 3])
248 
249  # Initialize the symmetric group particles list, and protein to
250  # symmetry group mapping
251  if symm_groups_file:
252  (symm_groups, group_member_to_symm_group_map,
253  curr_particle_index_in_group, first_group_member) = \
254  parse_symmetric_groups_file(symm_groups_file)
255  else:
256  symm_groups = None
257 
258  mod_id = 0 # index for each model in conform.
259 
260  for rmf_file in [rmf_A, rmf_B]:
261 
262  models_name.append(rmf_file)
263 
264  rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_file))
265  h = IMP.rmf.create_hierarchies(rmf_fh, m)[0]
266 
267  print("Opening RMF file:", rmf_file, "with",
268  rmf_fh.get_number_of_frames(), "frames")
269 
270  for f in range(rmf_fh.get_number_of_frames()):
271 
272  if f % 100 == 0:
273  # pass
274  print(" -- Opening frame", f, "of",
275  rmf_fh.get_number_of_frames())
276 
277  IMP.rmf.load_frame(rmf_fh, f)
278 
279  m.update()
280 
281  # Store particle indices and loop over individual protein
282  # names for symmetric copies
283  if subunit_name:
284  s0 = IMP.atom.Selection(h, resolution=resolution,
285  molecule=subunit_name)
286  elif selection is not None:
287  s0 = parse_rmsd_selection(h, selection, resolution)
288  else:
289  s0 = IMP.atom.Selection(h, resolution=resolution)
290 
291  particles = s0.get_selected_particles()
292 
293  # Copy particle coordinates
294  for i in range(len(particles)):
295  # i is an index over all particles in the system
296 
297  leaf = particles[i]
298  p = IMP.core.XYZR(leaf)
299  pxyz = p.get_coordinates()
300  conform[mod_id][i][0] = pxyz[0]
301  conform[mod_id][i][1] = pxyz[1]
302  conform[mod_id][i][2] = pxyz[2]
303 
304  # Just for the first model, update the masses and radii
305  # and log the particle name in ps_names
306  if mod_id == 0 and rmf_file == rmf_A:
307  masses.append(IMP.atom.Mass(leaf).get_mass())
308  radii.append(p.get_radius())
309  mol_name = IMP.atom.get_molecule_name(
310  IMP.atom.Hierarchy(leaf))
311  # traverse up the Hierarchy to get copy number of
312  # the molecule that the bead belongs to
313  copy_number = \
315 
316  # Add to symmetric groups if needed
317  if symm_groups_file:
318 
319  protein_plus_copy = mol_name+'.'+str(copy_number)
320  if protein_plus_copy in group_member_to_symm_group_map:
321  # protein copy is in a symmetric group
322  group_index = group_member_to_symm_group_map[
323  protein_plus_copy]
324  curr_particle_index_in_group[group_index] += 1
325 
326  if protein_plus_copy \
327  == first_group_member[group_index]:
328  symm_groups[group_index].append([i])
329 
330  else:
331  j = curr_particle_index_in_group[group_index] \
332  % len(symm_groups[group_index])
333  symm_groups[group_index][j].append(i)
334 
335  # TODO not tested on non-fragment systems
337  residues_in_bead = \
338  IMP.atom.Fragment(leaf).get_residue_indexes()
339  ps_names.append(
340  mol_name + "_" + str(min(residues_in_bead)) + "_"
341  + str(max(residues_in_bead)) + "_"
342  + str(copy_number))
343  else:
344  residue_in_bead = \
345  str(IMP.atom.Residue(leaf).get_index())
346  ps_names.append(
347  mol_name + "_" + residue_in_bead + "_" +
348  residue_in_bead + "_" + str(copy_number))
349  mod_id += 1
350 
351  if symm_groups_file:
352  for grp in symm_groups:
353  if len(grp) == 0:
354  print("Warning. Symmetry option specified but created "
355  "symmetry group is empty. Cross-check the "
356  "specification of symmetry groups.")
357 
358  return ps_names, masses, radii, conform, symm_groups, models_name, n_models
359 
360 
361 def get_rmsds_matrix(conforms, mode, sup, cores, symm_groups=None):
362 
363  if (mode == "cpu_serial" and not sup) or (mode == "cpu_omp" and not sup):
364  calculator_name = "NOSUP_OMP_CALCULATOR"
365 
366  elif mode == "cpu_omp" and sup:
367  calculator_name = "QCP_OMP_CALCULATOR"
368  print("we are using QCP_OMP to compute RMSD")
369  elif mode == "cuda" and sup:
370  calculator_name = "QCP_CUDA_MEM_CALCULATOR"
371  else:
372  print("Wrong values to pyRMSD ! Please Fix")
373  exit()
374 
375  if symm_groups:
376  print("We have ambiguity.")
377  calculator = pyRMSD.RMSDCalculator.RMSDCalculator(
378  calculator_name,
379  fittingCoordsets=conforms,
380  calcSymmetryGroups=symm_groups,
381  fitSymmetryGroups=symm_groups)
382 
383  else:
384  calculator = pyRMSD.RMSDCalculator.RMSDCalculator(
385  calculator_name, conforms)
386 
387  # additionally set number of cores for parallel calculator
388  if mode == "cpu_omp":
389  calculator.setNumberOfOpenMPThreads(int(cores))
390 
391  rmsd = calculator.pairwiseRMSDMatrix()
392  rmsd_matrix = CondensedMatrix(rmsd)
393  inner_data = rmsd_matrix.get_data()
394  np.save("Distances_Matrix.data", inner_data)
395 
396  return inner_data
Select non water and non hydrogen atoms.
Definition: pdb.h:245
Add mass to a particle.
Definition: Mass.h:23
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
double get_mass(ResidueType c)
Get the mass from the residue type.
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:73
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
std::string get_molecule_name(Hierarchy h)
A decorator for a residue.
Definition: Residue.h:137
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Support for the RMF file format for storing hierarchical molecular data and markup.
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27