IMP logo
IMP Reference Guide  develop.234970c887,2024/04/29
The Integrative Modeling Platform
scores_convergence.py
1 """@namespace IMP.sampcon.scores_convergence
2  Utilities to manage distributions of scores."""
3 
4 from __future__ import print_function
5 import numpy as np
6 
7 
8 def get_scores_distribution(scores, nbins, scorecolumn, hist_ofile):
9 
10  H, xedge = np.histogram(scores, bins=nbins)
11  with open(hist_ofile, 'w+') as f1:
12  for i in range(nbins):
13  print(xedge[i], H[i], file=f1)
14  return
15 
16 
17 def get_top_scorings_statistics(scores, scorecolumn, systemname):
18  list_threshold = []
19  [list_threshold.append(int((factor / 10.0) * len(scores)))
20  for factor in range(1, 11)]
21  with open("%s.Top_Score_Conv.txt" % systemname, 'w+') as f1:
22  print("Getting top scoring models at subsets of size:", list_threshold)
23 
24  for t in list_threshold:
25  samples = np.array([np.random.choice(
26  scores, t, replace=False).min() for i in range(150)])
27  print(t, samples.mean(), samples.std(), file=f1)
28 
29 
30 def get_scores_distributions_KS_Stats(score_A, score_B, nbins, systemname):
31  from scipy.stats import ks_2samp
32  d_stat, p_value = ks_2samp(score_A, score_B)
33 
34  get_scores_distribution(
35  score_A, nbins, 0, "%s.Score_Hist_A.txt" % systemname)
36  get_scores_distribution(
37  score_B, nbins, 0, "%s.Score_Hist_B.txt" % systemname)
38 
39  with open("%s.KS_Test.txt" % systemname, 'w+') as f1:
40  print(d_stat, p_value, file=f1)
41  return d_stat, p_value