IMP logo
IMP Reference Guide  2.14.0
The Integrative Modeling Platform
rmsd_calculation.py
1 from __future__ import print_function
2 import pyRMSD.RMSDCalculator
3 from pyRMSD.matrixHandler import MatrixHandler
4 from pyRMSD.condensedMatrix import CondensedMatrix
5 
6 import numpy as np
7 import sys, os, glob
8 
9 import IMP
10 import IMP.atom
11 import IMP.rmf
12 import RMF
13 
14 def get_pdbs_coordinates(path, idfile_A, idfile_B):
15 
16  pts = []
17  conform = []
18  num = 0
19  masses = []
20  radii = []
21 
22  models_name = []
23 
24  with open(idfile_A, 'w+') as f1:
25  for str_file in sorted(glob.glob("%s/sample_A/*.pdb" % path),
26  key=lambda x:int(x.split('/')[-1].split('.')[0])):
27  print(str_file, num, file=f1)
28  models_name.append(str_file)
29 
30  m = IMP.Model()
31  mh = IMP.atom.read_pdb(str_file, m,
33  mps = IMP.core.get_leaves(mh)
34  pts = [IMP.core.XYZ(p).get_coordinates() for p in mps]
35  if num == 0:
36  masses = [IMP.atom.Mass(p).get_mass() for p in mps]
37  radii = [IMP.core.XYZR(p).get_radius() for p in mps]
38  conform.append(pts)
39  pts = []
40  num = num + 1
41 
42  with open(idfile_B, 'w+') as f2:
43  for str_file in sorted(glob.glob("%s/sample_B/*.pdb" % path),
44  key=lambda x:int(x.split('/')[-1].split('.')[0])):
45  print(str_file, num, file=f2)
46  models_name.append(str_file)
47 
48  m = IMP.Model()
49  mh = IMP.atom.read_pdb(str_file, m,
51  mps = IMP.core.get_leaves(mh)
52  pts = [IMP.core.XYZ(p).get_coordinates() for p in mps]
53  conform.append(pts)
54  pts = []
55  num = num + 1
56 
57  return np.array(conform), masses, radii, models_name
58 
59 def get_rmfs_coordinates(path, idfile_A, idfile_B, subunit_name):
60 
61  conform = []
62  num = 0
63  masses = []
64  radii = []
65  ps_names = []
66 
67  f1=open(idfile_A, 'w+')
68  f2=open(idfile_B, 'w+')
69 
70  models_name = []
71 
72  for sample_name,sample_id_file in zip(['A','B'],[f1,f2]):
73 
74  for str_file in sorted(glob.glob("%s/sample_%s/*.rmf3" % (path,sample_name)),key=lambda x:int(x.split('/')[-1].split('.')[0])):
75  print(str_file, num, file=sample_id_file)
76  models_name.append(str_file)
77 
78  m = IMP.Model()
79  inf = RMF.open_rmf_file_read_only(str_file)
80  h = IMP.rmf.create_hierarchies(inf, m)[0]
81  IMP.rmf.load_frame(inf, 0)
82 
83  pts = []
84 
85  if subunit_name:
86  s0 = IMP.atom.Selection(h, resolution=1,molecule=subunit_name)
87  else:
88  s0 = IMP.atom.Selection(h, resolution=1)
89 
90  for leaf in s0.get_selected_particles():
91 
92  p=IMP.core.XYZR(leaf)
93  pts.append([p.get_coordinates()[i] for i in range(3)])
94 
95  if num == 0 and sample_name=='A':
96  masses.append(IMP.atom.Mass(leaf).get_mass())
97  radii.append(p.get_radius())
99  # traverse up the Hierarchy to get copy number of the molecule that the bead belongs to
101 
102  if IMP.atom.Fragment.get_is_setup(leaf): #TODO not tested on non-fragment systems
103  residues_in_bead = IMP.atom.Fragment(leaf).get_residue_indexes()
104 
105  ps_names.append(mol_name+"_"+str(min(residues_in_bead))+"_"+str(max(residues_in_bead))+"_"+str(copy_number))
106 
107  else:
108  residue_in_bead = str(IMP.atom.Residue(leaf).get_index())
109 
110  ps_names.append(mol_name+"_"+residue_in_bead+"_"+residue_in_bead+"_"+str(copy_number))
111 
112  conform.append(pts)
113  pts = []
114  num = num + 1
115  f1.close()
116  f2.close()
117 
118  return ps_names, masses, radii, np.array(conform), models_name
119 
120 def parse_symmetric_groups_file(symm_groups_file):
121 
122  symm_groups =[]
123  member_to_symm_group={}
124  first_group_member=[]
125  curr_particle_index_in_group = []
126 
127  sgf = open(symm_groups_file,'r')
128 
129  for indx,ln in enumerate(sgf.readlines()):
130 
131  symm_groups.append([]) # create new symm group list
132  curr_particle_index_in_group.append(-1) # particle index for new symmetric group
133 
134  fields = ln.strip().split()
135 
136  for fld in fields:
137  member_to_symm_group[fld] = indx # group that the current protein copy belongs to
138 
139  first_group_member.append(fields[0]) # the first group member is special! We create a symm group list of particles for the first group member
140 
141  sgf.close()
142 
143  return (symm_groups, member_to_symm_group, curr_particle_index_in_group, first_group_member)
144 
145 def get_rmfs_coordinates_one_rmf(path, rmf_A, rmf_B, subunit_name=None, symm_groups_file=None):
146 
147  '''Modified RMF coordinates function to work with symmetric copies'''
148 
149  # Open RMFs and get total number of models
150  rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path,rmf_A))
151  n_models = [rmf_fh.get_number_of_frames()]
152  rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path,rmf_B))
153  n_models.append(rmf_fh.get_number_of_frames())
154 
155  masses = []
156  radii = []
157  ps_names = []
158 
159  models_name = []
160 
161  # Build hierarchy from the RMF file
162  m = IMP.Model()
163  h = IMP.rmf.create_hierarchies(rmf_fh, m)[0]
164  IMP.rmf.load_frame(rmf_fh, 0)
165  m.update()
166 
167  ######
168  # Initialize output array
169  pts = 0 # number of particles in each model
170 
171  # Get selection
172  if subunit_name:
173  s0 = IMP.atom.Selection(h, resolution=1, molecule=subunit_name)
174  else:
175  s0 = IMP.atom.Selection(h, resolution=1)
176 
177  # Count particles
178  for leaf in s0.get_selected_particles():
179  p=IMP.core.XYZR(leaf)
180  pts+=1
181 
182  # Initialize array
183  conform = np.empty([n_models[0]+n_models[1], pts, 3])
184 
185  # Initialize the symmetric group particles list, and protein to symmetry group mapping
186  if symm_groups_file:
187  (symm_groups, group_member_to_symm_group_map, curr_particle_index_in_group, first_group_member)=parse_symmetric_groups_file(symm_groups_file)
188  else: symm_groups = None
189 
190  mod_id = 0 # index for each model in conform.
191 
192  for rmf_file in [rmf_A, rmf_B]:
193 
194  models_name.append(rmf_file)
195 
196  rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path,rmf_file))
197  h = IMP.rmf.create_hierarchies(rmf_fh, m)[0]
198 
199  print("Opening RMF file:", rmf_file, "with", rmf_fh.get_number_of_frames(), "frames")
200 
201  for f in range(rmf_fh.get_number_of_frames()):
202 
203  if f%100==0:
204  #pass
205  print(" -- Opening frame", f, "of", rmf_fh.get_number_of_frames())
206 
207  IMP.rmf.load_frame(rmf_fh, f)
208 
209  m.update()
210 
211  # Store particle indices and loop over individual protein names for symmetric copies
212  if subunit_name:
213  s0 = IMP.atom.Selection(h, resolution=1, molecule=subunit_name)
214  else:
215  s0 = IMP.atom.Selection(h, resolution=1)
216 
217  particles = s0.get_selected_particles()
218 
219 
220  # Copy particle coordinates
221  for i in range(len(particles)): # i is an index over all particles in the system
222 
223  leaf=particles[i]
224  p=IMP.core.XYZR(leaf)
225  pxyz = p.get_coordinates()
226  conform[mod_id][i][0] = pxyz[0]
227  conform[mod_id][i][1] = pxyz[1]
228  conform[mod_id][i][2] = pxyz[2]
229 
230  # Just for the first model, update the masses and radii and log the particle name in ps_names
231  if mod_id == 0 and rmf_file==rmf_A:
232  masses.append(IMP.atom.Mass(leaf).get_mass())
233  radii.append(p.get_radius())
235  # traverse up the Hierarchy to get copy number of the molecule that the bead belongs to
237 
238  # Add to symmetric groups if needed
239  if symm_groups_file:
240 
241  protein_plus_copy = mol_name+'.'+str(copy_number)
242 
243  if protein_plus_copy in group_member_to_symm_group_map:
244  # protein copy is in a symmetric group
245 
246  group_index=group_member_to_symm_group_map[protein_plus_copy]
247 
248  curr_particle_index_in_group[group_index] +=1
249 
250  if protein_plus_copy == first_group_member[group_index]:
251  symm_groups[group_index].append([i])
252 
253  else:
254  j = curr_particle_index_in_group[group_index] % len(symm_groups[group_index])
255  symm_groups[group_index][j].append(i)
256 
257  if IMP.atom.Fragment.get_is_setup(leaf): #TODO not tested on non-fragment systems
258  residues_in_bead = IMP.atom.Fragment(leaf).get_residue_indexes()
259  ps_names.append(mol_name+"_"+str(min(residues_in_bead))+"_"+str(max(residues_in_bead))+"_"+str(copy_number))
260  else:
261  residue_in_bead = str(IMP.atom.Residue(leaf).get_index())
262  ps_names.append(mol_name+"_"+residue_in_bead+"_"+residue_in_bead+"_"+str(copy_number))
263  mod_id+=1
264 
265  return ps_names, masses, radii, conform, symm_groups, models_name, n_models
266 
267 def get_rmsds_matrix(conforms, mode, sup, cores,symm_groups=None):
268  print("Mode:",mode,"Superposition:",sup,"Number of cores:",cores)
269 
270  if(mode=="cpu_serial" and not sup) or (mode=="cpu_omp" and not sup):
271  calculator_name = "NOSUP_OMP_CALCULATOR"
272 
273  elif(mode=="cpu_omp" and sup):
274  calculator_name = "QCP_OMP_CALCULATOR"
275 
276  elif(mode=="cuda" and sup):
277  calculator_name = "QCP_CUDA_MEM_CALCULATOR"
278  else:
279  print("Wrong values to pyRMSD ! Please Fix")
280  exit()
281 
282  if symm_groups:
283  calculator = pyRMSD.RMSDCalculator.RMSDCalculator(calculator_name, fittingCoordsets=conforms,calculationCoordsets=conforms,calcSymmetryGroups=symm_groups)
284  else:
285  calculator = pyRMSD.RMSDCalculator.RMSDCalculator(calculator_name, conforms)
286 
287  # additionally set number of cores for parallel calculator
288  if(mode=="cpu_omp"):
289  calculator.setNumberOfOpenMPThreads(int(cores))
290 
291  rmsd = calculator.pairwiseRMSDMatrix()
292  rmsd_matrix=CondensedMatrix(rmsd)
293  inner_data = rmsd_matrix.get_data()
294  np.save("Distances_Matrix.data", inner_data)
295 
296  return inner_data
Select non water and non hydrogen atoms.
Definition: pdb.h:245
Add mass to a particle.
Definition: Mass.h:23
A decorator to associate a particle with a part of a protein/DNA/RNA.
Definition: Fragment.h:20
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
double get_mass(ResidueType c)
Get the mass from the residue type.
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:72
static bool get_is_setup(Model *m, ParticleIndex pi)
Definition: Fragment.h:46
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
std::string get_molecule_name(Hierarchy h)
A decorator for a residue.
Definition: Residue.h:135
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Definition: Selection.h:66
Support for the RMF file format for storing hierarchical molecular data and markup.
A decorator for a particle with x,y,z coordinates and a radius.
Definition: XYZR.h:27