1 """@namespace IMP.sampcon.rmsd_calculation
2 Utilities to help with RMSD calculation."""
4 from __future__
import print_function
5 import pyRMSD.RMSDCalculator
6 from pyRMSD.condensedMatrix
import CondensedMatrix
16 import multiprocessing
as mp
18 from itertools
import combinations
21 def parse_symm_groups_for_pyrmsd(s):
28 for j, k
in combinations(np.arange(n), 2):
29 sub_grp = np.array(grp)[:, np.array([j, k])]
30 output.append(sub_grp.tolist())
34 def parse_rmsd_selection(h, selection, resolution=1):
36 for domain_list
in selection.values():
42 for domain
in domain_list:
44 start_res = int(domain[0])
45 end_res = int(domain[1])
47 prot_plus_copy = domain[2]
49 if "." in prot_plus_copy:
50 copy_number = int(prot_plus_copy.split(
".")[1])
51 prot_name = prot_plus_copy.split(
".")[0]
55 prot_name = prot_plus_copy
59 copy_index=copy_number,
60 residue_indexes=range(start_res, end_res+1))
69 def get_pdbs_coordinates(path, idfile_A, idfile_B):
78 with open(idfile_A,
'w+')
as f1:
79 for str_file
in sorted(
80 glob.glob(
"%s/sample_A/*.pdb" % path),
81 key=
lambda x: int(x.split(
'/')[-1].split(
'.')[0])):
82 print(str_file, num, file=f1)
83 models_name.append(str_file)
97 with open(idfile_B,
'w+')
as f2:
98 for str_file
in sorted(
99 glob.glob(
"%s/sample_B/*.pdb" % path),
100 key=
lambda x: int(x.split(
'/')[-1].split(
'.')[0])):
101 print(str_file, num, file=f2)
102 models_name.append(str_file)
113 return np.array(conform), masses, radii, models_name
116 def get_rmfs_coordinates(path, idfile_A, idfile_B,
117 subunit_name=
None, selection=
None, resolution=1):
125 f1 = open(idfile_A,
'w+')
126 f2 = open(idfile_B,
'w+')
130 for sample_name, sample_id_file
in zip([
'A',
'B'], [f1, f2]):
132 for str_file
in sorted(
133 glob.glob(
"%s/sample_%s/*.rmf3" % (path, sample_name)),
134 key=
lambda x: int(x.split(
'/')[-1].split(
'.')[0])):
135 print(str_file, num, file=sample_id_file)
136 models_name.append(str_file)
139 inf = RMF.open_rmf_file_read_only(str_file)
147 molecule=subunit_name)
148 elif selection
is not None:
149 s0 = parse_rmsd_selection(h, selection, resolution)
153 for leaf
in s0.get_selected_particles():
156 pts.append([p.get_coordinates()[i]
for i
in range(3)])
158 if num == 0
and sample_name ==
'A':
160 radii.append(p.get_radius())
174 mol_name +
"_" + str(min(residues_in_bead)) +
"_"
175 + str(max(residues_in_bead)) +
"_"
182 ps_names.append(mol_name +
"_" + residue_in_bead +
"_"
183 + residue_in_bead +
"_"
192 return ps_names, masses, radii, np.array(conform), models_name
195 def parse_symmetric_groups_file(symm_groups_file):
197 member_to_symm_group = {}
198 first_group_member = []
199 curr_particle_index_in_group = []
201 sgf = open(symm_groups_file,
'r')
203 for indx, ln
in enumerate(sgf.readlines()):
205 symm_groups.append([])
207 curr_particle_index_in_group.append(-1)
208 fields = ln.strip().split()
212 for subfld
in fld.split(
'/'):
213 member_to_symm_group[subfld] = indx
217 first_group_member.append(fields[0].split(
'/'))
221 return (symm_groups, member_to_symm_group, curr_particle_index_in_group,
225 def get_conforms_per_frame_batch(arg_bundle):
226 rmf_file, frames, mod_id_start = arg_bundle[:3]
227 resolution, subunit_name, selection, path = arg_bundle[3:]
228 from collections
import defaultdict
230 rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_file))
232 result = defaultdict(list)
233 mod_id = mod_id_start
239 molecule=subunit_name)
240 elif selection
is not None:
241 s0 = parse_rmsd_selection(h, selection, resolution)
245 particles = s0.get_selected_particles()
248 for i
in range(len(particles)):
253 pxyz = p.get_coordinates()
254 result[mod_id].append(list(pxyz))
261 symm_groups_file=
None, selection=
None,
262 resolution=1, n_cores=
None):
263 '''Modified RMF coordinates function to work with symmetric copies'''
267 rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_A))
268 n_models = [rmf_fh.get_number_of_frames()]
269 rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_B))
270 n_models.append(rmf_fh.get_number_of_frames())
291 molecule=subunit_name)
292 elif selection
is not None:
293 s0 = parse_rmsd_selection(h, selection, resolution)
298 for leaf
in s0.get_selected_particles():
303 conform = np.empty([n_models[0]+n_models[1], pts, 3])
308 (symm_groups, group_member_to_symm_group_map,
309 curr_particle_index_in_group, first_group_member) = \
310 parse_symmetric_groups_file(symm_groups_file)
315 rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_A))
321 molecule=subunit_name)
322 elif selection
is not None:
323 s0 = parse_rmsd_selection(h, selection, resolution)
326 particles = s0.get_selected_particles()
329 for i
in range(len(particles)):
335 radii.append(p.get_radius())
346 protein_plus_copy = mol_name+
'.'+str(copy_number)
347 if protein_plus_copy
in group_member_to_symm_group_map:
349 group_index = group_member_to_symm_group_map[
351 curr_particle_index_in_group[group_index] += 1
353 if protein_plus_copy \
354 in first_group_member[group_index]:
355 symm_groups[group_index].append([i])
358 j = curr_particle_index_in_group[group_index] \
359 % len(symm_groups[group_index])
360 symm_groups[group_index][j].append(i)
367 mol_name +
"_" + str(min(residues_in_bead)) +
"_"
368 + str(max(residues_in_bead)) +
"_"
374 mol_name +
"_" + residue_in_bead +
"_" +
375 residue_in_bead +
"_" + str(copy_number))
378 for rmf_file
in [rmf_A, rmf_B]:
380 models_name.append(rmf_file)
381 rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_file))
383 n_frames = rmf_fh.get_number_of_frames()
384 print(
"Opening RMF file:", rmf_file,
"with",
387 n_cores = min(n_cores, n_frames)
388 n_per_core = n_frames // n_cores
389 spacing = np.arange(0, n_per_core * n_cores, n_per_core)
390 mod_id_starts = spacing + mod_id
391 frame_number_starts = spacing
392 frame_number_ends = spacing + n_per_core
393 frame_number_ends[-1] = n_frames - 1
395 for i
in range(n_cores):
396 a = frame_number_starts[i]
397 b = frame_number_ends[i]
398 frame_lists.append(np.arange(a, b + 1))
400 args_list = [(rmf_file, frame_lists[i], mod_id_starts[i], resolution,
401 subunit_name, selection, path)
for i
in range(n_cores)]
402 results = p.map(get_conforms_per_frame_batch, args_list)
405 conform[m_id, :, :] = np.array(res[m_id])
409 for grp
in symm_groups:
411 print(
"Warning. Symmetry option specified but created "
412 "symmetry group is empty. Cross-check the "
413 "specification of symmetry groups.")
414 check = [len(x)
for x
in grp]
415 message =
"Unequal no. of particles in same symm group."
416 message2 =
"Cross-check the specification of symm groups."
417 assert len(set(check)) == 1, message + message2
419 return ps_names, masses, radii, conform, symm_groups, models_name, n_models
422 def get_rmsds_matrix(conforms, mode, sup, cores, symm_groups=None):
424 if (mode ==
"cpu_serial" and not sup)
or (mode ==
"cpu_omp" and not sup):
425 calculator_name =
"NOSUP_OMP_CALCULATOR"
427 elif mode ==
"cpu_omp" and sup:
428 calculator_name =
"QCP_OMP_CALCULATOR"
429 print(
"we are using QCP_OMP to compute RMSD")
430 elif mode ==
"cuda" and sup:
431 calculator_name =
"QCP_CUDA_MEM_CALCULATOR"
433 print(
"Wrong values to pyRMSD ! Please Fix")
437 print(
"We have ambiguity.")
438 if calculator_name ==
'NOSUP_OMP_CALCULATOR':
440 s1 = parse_symm_groups_for_pyrmsd(symm_groups)
441 calculator = pyRMSD.RMSDCalculator.RMSDCalculator(
443 fittingCoordsets=conforms,
444 calculationCoordsets=conforms,
445 calcSymmetryGroups=s1,
446 fitSymmetryGroups=[])
448 calculator = pyRMSD.RMSDCalculator.RMSDCalculator(
450 fittingCoordsets=conforms,
451 calcSymmetryGroups=[],
452 fitSymmetryGroups=symm_groups)
455 calculator = pyRMSD.RMSDCalculator.RMSDCalculator(
456 calculator_name, conforms)
459 if mode ==
"cpu_omp":
461 calculator.setNumberOfOpenMPThreads(cores)
462 rmsd = calculator.pairwiseRMSDMatrix()
463 elif sys.version_info[0] == 3:
464 if mode ==
"cpu_omp":
466 calculator.setNumberOfOpenMPThreads(1)
469 gen = p.imap(calculator.oneVsFollowing, list(range(len(conforms) - 1)))
475 if mode ==
"cpu_omp":
477 calculator.setNumberOfOpenMPThreads(cores)
479 for i
in range(len(conforms) - 1):
480 rmsd += list(calculator.oneVsFollowing(i))
481 rmsd_matrix = CondensedMatrix(rmsd)
482 inner_data = rmsd_matrix.get_data()
483 np.save(
"Distances_Matrix.data", inner_data)
Select non water and non hydrogen atoms.
A decorator to associate a particle with a part of a protein/DNA/RNA.
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
double get_mass(ResidueType c)
Get the mass from the residue type.
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
static bool get_is_setup(Model *m, ParticleIndex pi)
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
def get_rmfs_coordinates_one_rmf
Modified RMF coordinates function to work with symmetric copies.
std::string get_molecule_name(Hierarchy h)
A decorator for a residue.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Support for the RMF file format for storing hierarchical molecular data and markup.
A decorator for a particle with x,y,z coordinates and a radius.