1 from __future__
import print_function
7 def get_scores_distribution(scores, nbins, scorecolumn, hist_ofile):
9 H, xedge = np.histogram(scores, bins=nbins)
10 with open(hist_ofile,
'w+')
as f1:
11 for i
in range(nbins):
12 print(xedge[i], H[i], file=f1)
15 def get_top_scorings_statistics(scores, scorecolumn, systemname):
18 [list_threshold.append( int((factor / 10.0) *len(scores)))
for factor
in range(1, 11)]
19 with open(
"%s.Top_Score_Conv.txt" % systemname,
'w+')
as f1:
20 print(
"Getting top scoring models at subsets of size:",list_threshold)
22 for t
in list_threshold:
23 samples = np.array([np.random.choice(
24 scores, t, replace=
False).min()
for i
in range(150)])
25 print(t, samples.mean(), samples.std(), file=f1)
27 def get_scores_distributions_KS_Stats(score_A, score_B, nbins, systemname):
29 from scipy.stats
import mannwhitneyu, ks_2samp
30 d_stat, p_value = ks_2samp(score_A, score_B)
32 get_scores_distribution(score_A, nbins, 0,
"%s.Score_Hist_A.txt" % systemname)
33 get_scores_distribution(score_B, nbins, 0,
"%s.Score_Hist_B.txt" % systemname)
35 with open(
"%s.KS_Test.txt" % systemname,
'w+')
as f1:
36 print(d_stat, p_value, file=f1)
37 return d_stat, p_value