IMP logo
IMP Reference Guide  2.14.0
The Integrative Modeling Platform
scores_convergence.py
1 from __future__ import print_function
2 import sys, os
3 import math
4 import numpy as np
5 
6 
7 def get_scores_distribution(scores, nbins, scorecolumn, hist_ofile):
8 
9  H, xedge = np.histogram(scores, bins=nbins)
10  with open(hist_ofile, 'w+') as f1:
11  for i in range(nbins):
12  print(xedge[i], H[i], file=f1)
13  return
14 
15 def get_top_scorings_statistics(scores, scorecolumn, systemname):
16 
17  list_threshold = []
18  [list_threshold.append( int((factor / 10.0) *len(scores))) for factor in range(1, 11)]
19  with open("%s.Top_Score_Conv.txt" % systemname, 'w+') as f1:
20  print("Getting top scoring models at subsets of size:",list_threshold)
21 
22  for t in list_threshold:
23  samples = np.array([np.random.choice(
24  scores, t, replace=False).min() for i in range(150)])
25  print(t, samples.mean(), samples.std(), file=f1)
26 
27 def get_scores_distributions_KS_Stats(score_A, score_B, nbins, systemname):
28  import scipy as sp
29  from scipy.stats import mannwhitneyu, ks_2samp
30  d_stat, p_value = ks_2samp(score_A, score_B)
31 
32  get_scores_distribution(score_A, nbins, 0, "%s.Score_Hist_A.txt" % systemname)
33  get_scores_distribution(score_B, nbins, 0, "%s.Score_Hist_B.txt" % systemname)
34 
35  with open("%s.KS_Test.txt" % systemname, 'w+') as f1:
36  print(d_stat, p_value, file=f1)
37  return d_stat, p_value