1 from __future__
import print_function
2 import pyRMSD.RMSDCalculator
3 from pyRMSD.condensedMatrix
import CondensedMatrix
13 import multiprocessing
as mp
14 from itertools
import combinations
17 def parse_symm_groups_for_pyrmsd(s):
24 for j, k
in combinations(np.arange(n), 2):
25 sub_grp = np.array(grp)[:, np.array([j, k])]
26 output.append(sub_grp.tolist())
30 def parse_rmsd_selection(h, selection, resolution=1):
32 for domain_list
in selection.values():
38 for domain
in domain_list:
40 start_res = int(domain[0])
41 end_res = int(domain[1])
43 prot_plus_copy = domain[2]
45 if "." in prot_plus_copy:
46 copy_number = int(prot_plus_copy.split(
".")[1])
47 prot_name = prot_plus_copy.split(
".")[0]
51 prot_name = prot_plus_copy
55 copy_index=copy_number,
56 residue_indexes=range(start_res, end_res+1))
65 def get_pdbs_coordinates(path, idfile_A, idfile_B):
74 with open(idfile_A,
'w+')
as f1:
75 for str_file
in sorted(
76 glob.glob(
"%s/sample_A/*.pdb" % path),
77 key=
lambda x: int(x.split(
'/')[-1].split(
'.')[0])):
78 print(str_file, num, file=f1)
79 models_name.append(str_file)
93 with open(idfile_B,
'w+')
as f2:
94 for str_file
in sorted(
95 glob.glob(
"%s/sample_B/*.pdb" % path),
96 key=
lambda x: int(x.split(
'/')[-1].split(
'.')[0])):
97 print(str_file, num, file=f2)
98 models_name.append(str_file)
109 return np.array(conform), masses, radii, models_name
112 def get_rmfs_coordinates(path, idfile_A, idfile_B,
113 subunit_name=
None, selection=
None, resolution=1):
121 f1 = open(idfile_A,
'w+')
122 f2 = open(idfile_B,
'w+')
126 for sample_name, sample_id_file
in zip([
'A',
'B'], [f1, f2]):
128 for str_file
in sorted(
129 glob.glob(
"%s/sample_%s/*.rmf3" % (path, sample_name)),
130 key=
lambda x: int(x.split(
'/')[-1].split(
'.')[0])):
131 print(str_file, num, file=sample_id_file)
132 models_name.append(str_file)
135 inf = RMF.open_rmf_file_read_only(str_file)
143 molecule=subunit_name)
144 elif selection
is not None:
145 s0 = parse_rmsd_selection(h, selection, resolution)
149 for leaf
in s0.get_selected_particles():
152 pts.append([p.get_coordinates()[i]
for i
in range(3)])
154 if num == 0
and sample_name ==
'A':
156 radii.append(p.get_radius())
170 mol_name +
"_" + str(min(residues_in_bead)) +
"_"
171 + str(max(residues_in_bead)) +
"_"
178 ps_names.append(mol_name +
"_" + residue_in_bead +
"_"
179 + residue_in_bead +
"_"
188 return ps_names, masses, radii, np.array(conform), models_name
191 def parse_symmetric_groups_file(symm_groups_file):
193 member_to_symm_group = {}
194 first_group_member = []
195 curr_particle_index_in_group = []
197 sgf = open(symm_groups_file,
'r')
199 for indx, ln
in enumerate(sgf.readlines()):
201 symm_groups.append([])
203 curr_particle_index_in_group.append(-1)
204 fields = ln.strip().split()
208 member_to_symm_group[fld] = indx
212 first_group_member.append(fields[0])
216 return (symm_groups, member_to_symm_group, curr_particle_index_in_group,
220 def get_conforms_per_frame_batch(arg_bundle):
221 rmf_file, frames, mod_id_start = arg_bundle[:3]
222 resolution, subunit_name, selection, path = arg_bundle[3:]
223 from collections
import defaultdict
225 rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_file))
227 result = defaultdict(list)
228 mod_id = mod_id_start
234 molecule=subunit_name)
235 elif selection
is not None:
236 s0 = parse_rmsd_selection(h, selection, resolution)
240 particles = s0.get_selected_particles()
243 for i
in range(len(particles)):
248 pxyz = p.get_coordinates()
249 result[mod_id].append(list(pxyz))
254 def get_rmfs_coordinates_one_rmf(path, rmf_A, rmf_B,
256 symm_groups_file=
None, selection=
None,
257 resolution=1, n_cores=
None):
258 '''Modified RMF coordinates function to work with symmetric copies'''
262 rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_A))
263 n_models = [rmf_fh.get_number_of_frames()]
264 rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_B))
265 n_models.append(rmf_fh.get_number_of_frames())
286 molecule=subunit_name)
287 elif selection
is not None:
288 s0 = parse_rmsd_selection(h, selection, resolution)
293 for leaf
in s0.get_selected_particles():
298 conform = np.empty([n_models[0]+n_models[1], pts, 3])
303 (symm_groups, group_member_to_symm_group_map,
304 curr_particle_index_in_group, first_group_member) = \
305 parse_symmetric_groups_file(symm_groups_file)
310 rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_A))
316 molecule=subunit_name)
317 elif selection
is not None:
318 s0 = parse_rmsd_selection(h, selection, resolution)
321 particles = s0.get_selected_particles()
324 for i
in range(len(particles)):
330 radii.append(p.get_radius())
341 protein_plus_copy = mol_name+
'.'+str(copy_number)
342 if protein_plus_copy
in group_member_to_symm_group_map:
344 group_index = group_member_to_symm_group_map[
346 curr_particle_index_in_group[group_index] += 1
348 if protein_plus_copy \
349 == first_group_member[group_index]:
350 symm_groups[group_index].append([i])
353 j = curr_particle_index_in_group[group_index] \
354 % len(symm_groups[group_index])
355 symm_groups[group_index][j].append(i)
362 mol_name +
"_" + str(min(residues_in_bead)) +
"_"
363 + str(max(residues_in_bead)) +
"_"
369 mol_name +
"_" + residue_in_bead +
"_" +
370 residue_in_bead +
"_" + str(copy_number))
373 for rmf_file
in [rmf_A, rmf_B]:
375 models_name.append(rmf_file)
376 rmf_fh = RMF.open_rmf_file_read_only(os.path.join(path, rmf_file))
378 n_frames = rmf_fh.get_number_of_frames()
379 print(
"Opening RMF file:", rmf_file,
"with",
382 n_cores = min(n_cores, n_frames)
383 n_per_core = n_frames // n_cores
384 spacing = np.arange(0, n_per_core * n_cores, n_per_core)
385 mod_id_starts = spacing + mod_id
386 frame_number_starts = spacing
387 frame_number_ends = spacing + n_per_core
388 frame_number_ends[-1] = n_frames - 1
390 for i
in range(n_cores):
391 a = frame_number_starts[i]
392 b = frame_number_ends[i]
393 frame_lists.append(np.arange(a, b + 1))
395 args_list = [(rmf_file, frame_lists[i], mod_id_starts[i], resolution,
396 subunit_name, selection, path)
for i
in range(n_cores)]
397 results = p.map(get_conforms_per_frame_batch, args_list)
400 conform[m_id, :, :] = np.array(res[m_id])
404 for grp
in symm_groups:
406 print(
"Warning. Symmetry option specified but created "
407 "symmetry group is empty. Cross-check the "
408 "specification of symmetry groups.")
410 return ps_names, masses, radii, conform, symm_groups, models_name, n_models
413 def get_rmsds_matrix(conforms, mode, sup, cores, symm_groups=None):
415 if (mode ==
"cpu_serial" and not sup)
or (mode ==
"cpu_omp" and not sup):
416 calculator_name =
"NOSUP_OMP_CALCULATOR"
418 elif mode ==
"cpu_omp" and sup:
419 calculator_name =
"QCP_OMP_CALCULATOR"
420 print(
"we are using QCP_OMP to compute RMSD")
421 elif mode ==
"cuda" and sup:
422 calculator_name =
"QCP_CUDA_MEM_CALCULATOR"
424 print(
"Wrong values to pyRMSD ! Please Fix")
428 print(
"We have ambiguity.")
429 s1 = parse_symm_groups_for_pyrmsd(symm_groups)
430 calculator = pyRMSD.RMSDCalculator.RMSDCalculator(
432 fittingCoordsets=conforms,
433 calcSymmetryGroups=s1,
434 fitSymmetryGroups=s1)
437 calculator = pyRMSD.RMSDCalculator.RMSDCalculator(
438 calculator_name, conforms)
441 if mode ==
"cpu_omp":
442 calculator.setNumberOfOpenMPThreads(int(cores))
445 rmsd = calculator.pairwiseRMSDMatrix()
448 for i
in range(len(conforms) - 1):
449 rmsd += list(calculator.oneVsFollowing(i))
450 rmsd_matrix = CondensedMatrix(rmsd)
451 inner_data = rmsd_matrix.get_data()
452 np.save(
"Distances_Matrix.data", inner_data)
Select non water and non hydrogen atoms.
A decorator to associate a particle with a part of a protein/DNA/RNA.
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
double get_mass(ResidueType c)
Get the mass from the residue type.
GenericHierarchies get_leaves(Hierarchy mhd)
Get all the leaves of the bit of hierarchy.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
static bool get_is_setup(Model *m, ParticleIndex pi)
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
std::string get_molecule_name(Hierarchy h)
A decorator for a residue.
int get_copy_index(Hierarchy h)
Walk up the hierarchy to find the current copy index.
Functionality for loading, creating, manipulating and scoring atomic structures.
Select hierarchy particles identified by the biological name.
Support for the RMF file format for storing hierarchical molecular data and markup.
A decorator for a particle with x,y,z coordinates and a radius.