1 from __future__
import print_function
2 import os,sys,random,numpy,math
6 from scipy
import spatial
9 import pyRMSD.RMSDCalculator
10 from pyRMSD.matrixHandler
import MatrixHandler
11 from pyRMSD.condensedMatrix
import CondensedMatrix
14 def get_sample_identity(idfile_A, idfile_B):
19 with open(idfile_A,
'r') as f:
21 mod = line.split(
"/")[-1]
22 sampleA_models.append(int(mod.strip(
"\n").split(
" ")[1]))
25 with open(idfile_B,
'r') as f:
27 mod = line.split(
"/")[-1]
28 sampleB_models.append(int(mod.strip(
"\n").split(
" ")[1]))
29 return sampleA_models, sampleB_models
32 def get_cutoffs_list(distmat, gridSize):
34 mindist = distmat.min()
35 maxdist = distmat.max()
37 print(
"Minimum and maximum pairwise model distances:",mindist, maxdist)
38 cutoffs=numpy.arange(mindist+gridSize,maxdist,gridSize)
42 def precision_cluster(distmat,numModels,rmsd_cutoff):
45 for count
in range(numModels):
46 neighbors.append([count])
48 inds = numpy.argwhere(distmat <= rmsd_cutoff)
54 neighbors[i].append(j)
55 neighbors[j].append(i)
60 for i
in range(numModels):
62 boolUnclustered.append(
True)
67 while len(unclustered)>0:
71 for eachu
in unclustered:
72 if len(neighbors[eachu])>max_neighbors:
73 max_neighbors=len(neighbors[eachu])
77 cluster_centers.append(currcenter)
78 cluster_members.append([n
for n
in neighbors[currcenter]])
81 for n
in neighbors[currcenter]:
84 boolUnclustered[n]=
False
86 for n
in neighbors[currcenter]:
87 for unn
in neighbors[n]:
88 if not boolUnclustered[unn]:
90 neighbors[unn].remove(n)
92 return cluster_centers, cluster_members
94 def get_contingency_table(num_clusters,cluster_members,all_models,run1_models,run2_models):
95 full_ctable=numpy.zeros((num_clusters,2))
97 for ic,cluster
in enumerate(cluster_members):
98 for member
in cluster:
99 model_index=all_models[member]
101 if model_index
in run1_models:
103 full_ctable[ic][0]+=1.0
104 elif model_index
in run2_models:
106 full_ctable[ic][1]+=1.0
109 numModelsRun1 = float(numpy.sum(full_ctable,axis=0)[0])
110 numModelsRun2 = float(numpy.sum(full_ctable,axis=0)[1])
115 for i
in range(num_clusters):
116 if full_ctable[i][0]<=10.0
or full_ctable[i][1]<=10.0:
119 reduced_ctable.append([full_ctable[i][0],full_ctable[i][1]])
120 retained_clusters.append(i)
121 return numpy.array(reduced_ctable),retained_clusters
123 def test_sampling_convergence(contingency_table,total_num_models):
124 if len(contingency_table)==0:
127 ct = numpy.transpose(contingency_table)
128 [chisquare,pvalue,dof,expected]=scipy.stats.chi2_contingency(ct)
132 cramersv=math.sqrt(chisquare/float(total_num_models))
134 return(pvalue,cramersv)
136 def percent_ensemble_explained(ctable,total_num_models):
139 percent_clustered=float(numpy.sum(ctable,axis=0)[0]+numpy.sum(ctable,axis=0)[1])*100.0/float(total_num_models)
140 return percent_clustered
142 def get_clusters(cutoffs_list, distmat_full, all_models, total_num_models, run1_all_models, run2_all_models, sysname):
147 with open(
"%s.ChiSquare_Grid_Stats.txt" % sysname,
'w+')
as f1:
148 for c
in cutoffs_list:
149 cluster_centers, cluster_members = precision_cluster(
150 distmat_full, total_num_models, c)
151 ctable, retained_clusters = get_contingency_table(
152 len(cluster_centers), cluster_members, all_models,
153 run1_all_models, run2_all_models)
154 pval, cramersv = test_sampling_convergence(ctable, total_num_models)
155 percent_explained = percent_ensemble_explained(
156 ctable, total_num_models)
160 percents.append(percent_explained)
162 print(c, pval, cramersv, percent_explained, file=f1)
164 return pvals, cvs, percents
166 def get_sampling_precision(cutoffs_list, pvals, cvs, percents):
167 sampling_precision=max(cutoffs_list)
169 cramersv_converged=1.0
170 percent_converged=0.0
172 for i
in range(len(cutoffs_list)):
174 if pvals[i]>0.05
or cvs[i]<0.10:
175 if sampling_precision>cutoffs_list[i]:
176 sampling_precision=cutoffs_list[i]
177 pval_converged=pvals[i]
178 cramersv_converged=cvs[i]
179 percent_converged=percents[i]
181 sampling_precision=max(cutoffs_list)
183 return sampling_precision,pval_converged,cramersv_converged,percent_converged