IMP logo
IMP Reference Guide  2.20.0
The Integrative Modeling Platform
scores_convergence.py
1 from __future__ import print_function
2 import numpy as np
3 
4 
5 def get_scores_distribution(scores, nbins, scorecolumn, hist_ofile):
6 
7  H, xedge = np.histogram(scores, bins=nbins)
8  with open(hist_ofile, 'w+') as f1:
9  for i in range(nbins):
10  print(xedge[i], H[i], file=f1)
11  return
12 
13 
14 def get_top_scorings_statistics(scores, scorecolumn, systemname):
15  list_threshold = []
16  [list_threshold.append(int((factor / 10.0) * len(scores)))
17  for factor in range(1, 11)]
18  with open("%s.Top_Score_Conv.txt" % systemname, 'w+') as f1:
19  print("Getting top scoring models at subsets of size:", list_threshold)
20 
21  for t in list_threshold:
22  samples = np.array([np.random.choice(
23  scores, t, replace=False).min() for i in range(150)])
24  print(t, samples.mean(), samples.std(), file=f1)
25 
26 
27 def get_scores_distributions_KS_Stats(score_A, score_B, nbins, systemname):
28  from scipy.stats import ks_2samp
29  d_stat, p_value = ks_2samp(score_A, score_B)
30 
31  get_scores_distribution(
32  score_A, nbins, 0, "%s.Score_Hist_A.txt" % systemname)
33  get_scores_distribution(
34  score_B, nbins, 0, "%s.Score_Hist_B.txt" % systemname)
35 
36  with open("%s.KS_Test.txt" % systemname, 'w+') as f1:
37  print(d_stat, p_value, file=f1)
38  return d_stat, p_value