1 from __future__
import print_function
5 def get_scores_distribution(scores, nbins, scorecolumn, hist_ofile):
7 H, xedge = np.histogram(scores, bins=nbins)
8 with open(hist_ofile,
'w+')
as f1:
10 print(xedge[i], H[i], file=f1)
14 def get_top_scorings_statistics(scores, scorecolumn, systemname):
16 [list_threshold.append(int((factor / 10.0) * len(scores)))
17 for factor
in range(1, 11)]
18 with open(
"%s.Top_Score_Conv.txt" % systemname,
'w+')
as f1:
19 print(
"Getting top scoring models at subsets of size:", list_threshold)
21 for t
in list_threshold:
22 samples = np.array([np.random.choice(
23 scores, t, replace=
False).min()
for i
in range(150)])
24 print(t, samples.mean(), samples.std(), file=f1)
27 def get_scores_distributions_KS_Stats(score_A, score_B, nbins, systemname):
28 from scipy.stats
import ks_2samp
29 d_stat, p_value = ks_2samp(score_A, score_B)
31 get_scores_distribution(
32 score_A, nbins, 0,
"%s.Score_Hist_A.txt" % systemname)
33 get_scores_distribution(
34 score_B, nbins, 0,
"%s.Score_Hist_B.txt" % systemname)
36 with open(
"%s.KS_Test.txt" % systemname,
'w+')
as f1:
37 print(d_stat, p_value, file=f1)
38 return d_stat, p_value