IMP logo
IMP Reference Guide  2.14.0
The Integrative Modeling Platform
select_good.py
1 #!/usr/bin/env python
2 from __future__ import print_function
3 import IMP
4 import os
5 from IMP import ArgumentParser
6 
7 
8 __doc__ = "List/extract good-scoring models from a set of sampling runs."""
9 
10 def parse_args():
11  parser = ArgumentParser(description="List and extract good-scoring models from a set of sampling runs. Example of usage: %(prog)s -rd <run_directory_for_sampling> -rp <run_prefix> -sl ExcludedVolumeSphere_None GaussianEMRestraint_None -pl CrossLinkingMassSpectrometryDataScore|XLDSS CrossLinkingMassSpectrometryDataScore|XLEDC -agl -9999999.0 -99999.0 -aul 99999999.0 999999.0 -mlt 0 0 -mut 0 0. Flag -h for more details.")
12 
13  parser.add_argument("-rd", "--run_directory", dest="run_dir",
14  help="directory in which sampling results are stored",
15  required=True)
16 
17  parser.add_argument("-rp", "--run_prefix", dest="run_prefix",
18  help="prefix of runs", required=True)
19 
20  parser.add_argument("-sl","--selection_keywords_list",nargs='+',type=str,dest="selection_keywords_list",help="list of stat file keywords corresponding to selection criteria")
21  parser.add_argument("-pl","--printing_keywords_list",nargs='+',type=str,dest="printing_keywords_list",help="list of stat file keywords whose values are printed out for selected models")
22 
23  # thresholds only apply to selection keywords
24  parser.add_argument("-alt","--aggregate_lower_thresholds",nargs='+',type=float,dest="aggregate_lower_thresholds",help="aggregate lower thresholds")
25  parser.add_argument("-aut","--aggregate_upper_thresholds",nargs='+',type=float,dest="aggregate_upper_thresholds",help="aggregate upper thresholds")
26  parser.add_argument("-mlt","--member_lower_thresholds",nargs='+',type=float,dest="member_lower_thresholds",help="member lower thresholds")
27  parser.add_argument("-mut","--member_upper_thresholds",nargs='+',type=float,dest="member_upper_thresholds",help="member upper thresholds")
28 
29  parser.add_argument("-e","--extract",default=False,dest="extract",action='store_true',help="Type -e to extract all good scoring model RMFs from the trajectory files")
30  parser.add_argument("-sf","--score_file",default="scores", type=str, dest="score_file_prefix",help="Score file prefix for samples A and B. Default is %(default)r")
31  result = parser.parse_args()
32 
33  return result
34 
35 def select_good_scoring_models():
36  from IMP.sampcon.good_scoring_model_selector import GoodScoringModelSelector
37 
38  # process input
39  arg=parse_args()
40 
41  gsms=GoodScoringModelSelector(arg.run_dir,arg.run_prefix)
42 
43  subsets = gsms.get_good_scoring_models(selection_keywords_list=arg.selection_keywords_list,printing_keywords_list=arg.printing_keywords_list,
44  aggregate_lower_thresholds=arg.aggregate_lower_thresholds,aggregate_upper_thresholds=arg.aggregate_upper_thresholds,
45  member_lower_thresholds=arg.member_lower_thresholds,member_upper_thresholds=arg.member_upper_thresholds,extract=arg.extract)
46  return subsets, arg.extract
47 
48 def create_score_files(subsets, field="Total_Score"):
49  arg=parse_args()
50 
51  score_dir = os.path.join(arg.run_dir,
52  "good_scoring_models")
53  scoreA = open(os.path.join(score_dir, arg.score_file_prefix + "A.txt"), "w")
54  scoreB = open(os.path.join(score_dir, arg.score_file_prefix + "B.txt"), "w")
55  model_file = open(os.path.join(score_dir, "model_ids_scores.txt"), "r")
56 
57  print("Creating input files for Total_Score convergence test")
58 
59  for line_index,each_model_line in enumerate(model_file.readlines()):
60 
61  # Find index of the field we want to use for model score convergence
62  if line_index==0:
63  field_headers = each_model_line.strip().split()
64 
65  try:
66  ts_ix = field_headers.index(field)
67  run_ix = field_headers.index("Run_id")
68  model_ix = field_headers.index("Model_index")
69 
70  except ValueError:
71  print("create_scores_file: model_ids_scores.txt file has an incorrect format.")
72  exit()
73 
74  else:
75  fields = each_model_line.strip().split()
76 
77  score=fields[ts_ix]
78 
79  model = int(fields[model_ix])
80  print(score, file=scoreA if model in subsets[0] else scoreB)
81 
82  scoreA.close()
83  scoreB.close()
84 
85 
86 def main():
87  subsets, extract = select_good_scoring_models()
88 
89  print("Filtered model scores ready")
90 
91  if extract:
92 
93  # Create Score Files
94  create_score_files(subsets)
95 
96  print("Ready to calculate sampling precision with Master_Sampling_Exhaustiveness_Analysis.py")
97 
98 
99 if __name__ == "__main__":
100  main()