1 """@namespace IMP.sampcon.scores_convergence
2 Utilities to manage distributions of scores."""
4 from __future__
import print_function
8 def get_scores_distribution(scores, nbins, scorecolumn, hist_ofile):
10 H, xedge = np.histogram(scores, bins=nbins)
11 with open(hist_ofile,
'w+')
as f1:
12 for i
in range(nbins):
13 print(xedge[i], H[i], file=f1)
17 def get_top_scorings_statistics(scores, scorecolumn, systemname):
19 [list_threshold.append(int((factor / 10.0) * len(scores)))
20 for factor
in range(1, 11)]
21 with open(
"%s.Top_Score_Conv.txt" % systemname,
'w+')
as f1:
22 print(
"Getting top scoring models at subsets of size:", list_threshold)
24 for t
in list_threshold:
25 samples = np.array([np.random.choice(
26 scores, t, replace=
False).min()
for i
in range(150)])
27 print(t, samples.mean(), samples.std(), file=f1)
30 def get_scores_distributions_KS_Stats(score_A, score_B, nbins, systemname):
31 from scipy.stats
import ks_2samp
32 d_stat, p_value = ks_2samp(score_A, score_B)
34 get_scores_distribution(
35 score_A, nbins, 0,
"%s.Score_Hist_A.txt" % systemname)
36 get_scores_distribution(
37 score_B, nbins, 0,
"%s.Score_Hist_B.txt" % systemname)
39 with open(
"%s.KS_Test.txt" % systemname,
'w+')
as f1:
40 print(d_stat, p_value, file=f1)
41 return d_stat, p_value