1 from __future__
import print_function
2 import pyRMSD.RMSDCalculator
3 from pyRMSD.condensedMatrix
import CondensedMatrix
13 import multiprocessing
as mp
15 from itertools
import combinations
18 def parse_symm_groups_for_pyrmsd(s):
25 for j, k
in combinations(np.arange(n), 2):
26 sub_grp = np.array(grp)[:, np.array([j, k])]
27 output.append(sub_grp.tolist())
31 def parse_rmsd_selection(h, selection, resolution=1):
33 for domain_list
in selection.values():
39 for domain
in domain_list:
41 start_res = int(domain[0])
42 end_res = int(domain[1])
44 prot_plus_copy = domain[2]
46 if "." in prot_plus_copy:
47 copy_number = int(prot_plus_copy.split(
".")[1])
48 prot_name = prot_plus_copy.split(
".")[0]
52 prot_name = prot_plus_copy
56 copy_index=copy_number,
57 residue_indexes=range(start_res, end_res+1))
66 def get_pdbs_coordinates(path, idfile_A, idfile_B):
75 with open(idfile_A,
'w+')
as f1:
76 for str_file
in sorted(
77 glob.glob(
"%s/sample_A/*.pdb" % path),
78 key=
lambda x: int(x.split(
'/')[-1].split(
'.')[0])):
79 print(str_file, num, file=f1)
80 models_name.append(str_file)
94 with open(idfile_B,
'w+')
as f2:
95 for str_file
in sorted(
96 glob.glob(
"%s/sample_B/*.pdb" % path),
97 key=
lambda x: int(x.split(
'/')[-1].split(
'.')[0])):
98 print(str_file, num, file=f2)
99 models_name.append(str_file)
110 return np.array(conform), masses, radii, models_name
113 def get_rmfs_coordinates(path, idfile_A, idfile_B,
114 subunit_name=
None, selection=
None, resolution=1):
122 f1 = open(idfile_A,
'w+')
123 f2 = open(idfile_B,
'w+')
127 for sample_name, sample_id_file
in zip([
'A',
'B'], [f1, f2]):
129 for str_file
in sorted(
130 glob.glob(
"%s/sample_%s/*.rmf3" % (path, sample_name)),
131 key=
lambda x: int(x.split(
'/')[-1].split(
'.')[0])):
132 print(str_file, num, file=sample_id_file)
133 models_name.append(str_file)
136 inf = RMF.open_rmf_file_read_only(str_file)
144 molecule=subunit_name)
145 elif selection
is not None:
146 s0 = parse_rmsd_selection(h, selection, resolution)
150 for leaf
in s0.get_selected_particles():
153 pts.append([p.get_coordinates()[i]
for i
in range(3)])
155 if num == 0
and sample_name ==
'A':
157 radii.append(p.get_radius())
171 mol_name +
"_" + str(min(residues_in_bead)) +
"_"
172 + str(max(residues_in_bead)) +
"_"
179 ps_names.append(mol_name +
"_" + residue_in_bead +
"_"
180 + residue_in_bead +
"_"
189 return ps_names, masses, radii, np.array(conform), models_name
192 def parse_symmetric_groups_file(symm_groups_file):
194 member_to_symm_group = {}
195 first_group_member = []
196 curr_particle_index_in_group = []
198 sgf = open(symm_groups_file,
'r')
200 for indx, ln
in enumerate(sgf.readlines()):
202 symm_groups.append([])
204 curr_particle_index_in_group.append(-1)
205 fields = ln.strip().split()
209 for subfld
in fld.split(
'/'):
210 member_to_symm_group[subfld] = indx
214 first_group_member.append(fields[0].split(
'/'))
218 return (symm_groups, member_to_symm_group, curr_particle_index_in_group,
222 def get_conforms_per_frame_batch(arg_bundle):
223 rmf_file, frames, mod_id_start = arg_bundle[:3]
224 resolution, subunit_name, selection, path = arg_bundle[3:]
225 from collections
import defaultdict
227 rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_file))
229 result = defaultdict(list)
230 mod_id = mod_id_start
236 molecule=subunit_name)
237 elif selection
is not None:
238 s0 = parse_rmsd_selection(h, selection, resolution)
242 particles = s0.get_selected_particles()
245 for i
in range(len(particles)):
250 pxyz = p.get_coordinates()
251 result[mod_id].append(list(pxyz))
256 def get_rmfs_coordinates_one_rmf(path, rmf_A, rmf_B,
258 symm_groups_file=
None, selection=
None,
259 resolution=1, n_cores=
None):
260 '''Modified RMF coordinates function to work with symmetric copies'''
264 rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_A))
265 n_models = [rmf_fh.get_number_of_frames()]
266 rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_B))
267 n_models.append(rmf_fh.get_number_of_frames())
288 molecule=subunit_name)
289 elif selection
is not None:
290 s0 = parse_rmsd_selection(h, selection, resolution)
295 for leaf
in s0.get_selected_particles():
300 conform = np.empty([n_models[0]+n_models[1], pts, 3])
305 (symm_groups, group_member_to_symm_group_map,
306 curr_particle_index_in_group, first_group_member) = \
307 parse_symmetric_groups_file(symm_groups_file)
312 rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_A))
318 molecule=subunit_name)
319 elif selection
is not None:
320 s0 = parse_rmsd_selection(h, selection, resolution)
323 particles = s0.get_selected_particles()
326 for i
in range(len(particles)):
332 radii.append(p.get_radius())
343 protein_plus_copy = mol_name+
'.'+str(copy_number)
344 if protein_plus_copy
in group_member_to_symm_group_map:
346 group_index = group_member_to_symm_group_map[
348 curr_particle_index_in_group[group_index] += 1
350 if protein_plus_copy \
351 in first_group_member[group_index]:
352 symm_groups[group_index].append([i])
355 j = curr_particle_index_in_group[group_index] \
356 % len(symm_groups[group_index])
357 symm_groups[group_index][j].append(i)
364 mol_name +
"_" + str(min(residues_in_bead)) +
"_"
365 + str(max(residues_in_bead)) +
"_"
371 mol_name +
"_" + residue_in_bead +
"_" +
372 residue_in_bead +
"_" + str(copy_number))
375 for rmf_file
in [rmf_A, rmf_B]:
377 models_name.append(rmf_file)
378 rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_file))
380 n_frames = rmf_fh.get_number_of_frames()
381 print(
"Opening RMF file:", rmf_file,
"with",
384 n_cores = min(n_cores, n_frames)
385 n_per_core = n_frames // n_cores
386 spacing = np.arange(0, n_per_core * n_cores, n_per_core)
387 mod_id_starts = spacing + mod_id
388 frame_number_starts = spacing
389 frame_number_ends = spacing + n_per_core
390 frame_number_ends[-1] = n_frames - 1
392 for i
in range(n_cores):
393 a = frame_number_starts[i]
394 b = frame_number_ends[i]
395 frame_lists.append(np.arange(a, b + 1))
397 args_list = [(rmf_file, frame_lists[i], mod_id_starts[i], resolution,
398 subunit_name, selection, path)
for i
in range(n_cores)]
399 results = p.map(get_conforms_per_frame_batch, args_list)
402 conform[m_id, :, :] = np.array(res[m_id])
406 for grp
in symm_groups:
408 print(
"Warning. Symmetry option specified but created "
409 "symmetry group is empty. Cross-check the "
410 "specification of symmetry groups.")
411 check = [len(x)
for x
in grp]
412 message =
"Unequal no. of particles in same symm group."
413 message2 =
"Cross-check the specification of symm groups."
414 assert len(set(check)) == 1, message + message2
416 return ps_names, masses, radii, conform, symm_groups, models_name, n_models
419 def get_rmsds_matrix(conforms, mode, sup, cores, symm_groups=None):
421 if (mode ==
"cpu_serial" and not sup)
or (mode ==
"cpu_omp" and not sup):
422 calculator_name =
"NOSUP_OMP_CALCULATOR"
424 elif mode ==
"cpu_omp" and sup:
425 calculator_name =
"QCP_OMP_CALCULATOR"
426 print(
"we are using QCP_OMP to compute RMSD")
427 elif mode ==
"cuda" and sup:
428 calculator_name =
"QCP_CUDA_MEM_CALCULATOR"
430 print(
"Wrong values to pyRMSD ! Please Fix")
434 print(
"We have ambiguity.")
435 if calculator_name ==
'NOSUP_OMP_CALCULATOR':
437 s1 = parse_symm_groups_for_pyrmsd(symm_groups)
438 calculator = pyRMSD.RMSDCalculator.RMSDCalculator(
440 fittingCoordsets=conforms,
441 calculationCoordsets=conforms,
442 calcSymmetryGroups=s1,
443 fitSymmetryGroups=[])
445 calculator = pyRMSD.RMSDCalculator.RMSDCalculator(
447 fittingCoordsets=conforms,
448 calcSymmetryGroups=[],
449 fitSymmetryGroups=symm_groups)
452 calculator = pyRMSD.RMSDCalculator.RMSDCalculator(
453 calculator_name, conforms)
456 if mode ==
"cpu_omp":
458 calculator.setNumberOfOpenMPThreads(cores)
459 rmsd = calculator.pairwiseRMSDMatrix()
460 elif sys.version_info[0] == 3:
461 if mode ==
"cpu_omp":
463 calculator.setNumberOfOpenMPThreads(1)
466 gen = p.imap(calculator.oneVsFollowing, list(range(len(conforms) - 1)))
472 if mode ==
"cpu_omp":
474 calculator.setNumberOfOpenMPThreads(cores)
476 for i
in range(len(conforms) - 1):
477 rmsd += list(calculator.oneVsFollowing(i))
478 rmsd_matrix = CondensedMatrix(rmsd)
479 inner_data = rmsd_matrix.get_data()
480 np.save(
"Distances_Matrix.data", inner_data)
Select non water and non hydrogen atoms.
A decorator to associate a particle with a part of a protein/DNA/RNA.
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
double get_mass(ResidueType c)
Get the mass from the residue type.
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
static bool get_is_setup(Model *m, ParticleIndex pi)
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
std::string get_molecule_name(Hierarchy h)
A decorator for a residue.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Support for the RMF file format for storing hierarchical molecular data and markup.
A decorator for a particle with x,y,z coordinates and a radius.