IMP logo
IMP Reference Guide  develop.d97d4ead1f,2024/11/21
The Integrative Modeling Platform
select_good.py
1 #!/usr/bin/env python
2 from __future__ import print_function
3 import os
4 from IMP import ArgumentParser
5 
6 
7 __doc__ = "List/extract good-scoring models from a set of sampling runs."""
8 
9 
10 def parse_args():
11  parser = ArgumentParser(
12  description="List and extract good-scoring models from a set of "
13  "sampling runs. Example of usage: "
14  "%(prog)s -rd <run_directory_for_sampling> -rp <run_prefix> -sl "
15  "ExcludedVolumeSphere_None GaussianEMRestraint_None "
16  "-pl CrossLinkingMassSpectrometryDataScore|"
17  "XLDSS CrossLinkingMassSpectrometryDataScore|XLEDC -agl -9999999.0 "
18  "-99999.0 -aul 99999999.0 999999.0 -mlt 0 0 -mut 0 0. Flag -h "
19  "for more details.")
20 
21  parser.add_argument("-rd", "--run_directory", dest="run_dir",
22  help="directory in which sampling results are stored",
23  required=True)
24 
25  parser.add_argument("-rp", "--run_prefix", dest="run_prefix",
26  help="prefix of runs", required=True)
27 
28  parser.add_argument("-sl", "--selection_keywords_list", nargs='+',
29  type=str, dest="selection_keywords_list",
30  help="list of stat file keywords corresponding to "
31  "selection criteria")
32  parser.add_argument("-pl", "--printing_keywords_list", nargs='+',
33  type=str, dest="printing_keywords_list",
34  help="list of stat file keywords whose values are "
35  "printed out for selected models")
36 
37  # thresholds only apply to selection keywords
38  parser.add_argument("-alt", "--aggregate_lower_thresholds", nargs='+',
39  type=float, dest="aggregate_lower_thresholds",
40  help="aggregate lower thresholds")
41  parser.add_argument("-aut", "--aggregate_upper_thresholds", nargs='+',
42  type=float, dest="aggregate_upper_thresholds",
43  help="aggregate upper thresholds")
44  parser.add_argument("-mlt", "--member_lower_thresholds", nargs='+',
45  type=float, dest="member_lower_thresholds",
46  help="member lower thresholds")
47  parser.add_argument("-mut", "--member_upper_thresholds", nargs='+',
48  type=float, dest="member_upper_thresholds",
49  help="member upper thresholds")
50 
51  parser.add_argument("-e", "--extract", default=False, dest="extract",
52  action='store_true',
53  help="Type -e to extract all good scoring model "
54  "RMFs from the trajectory files")
55  parser.add_argument("-sf", "--score_file", default="scores", type=str,
56  dest="score_file_prefix",
57  help="Score file prefix for samples A and B. "
58  "Default is %(default)r")
59  result = parser.parse_args()
60 
61  return result
62 
63 
64 def select_good_scoring_models():
65  from IMP.sampcon.good_scoring_model_selector \
66  import GoodScoringModelSelector
67 
68  # process input
69  arg = parse_args()
70 
71  gsms = GoodScoringModelSelector(arg.run_dir, arg.run_prefix)
72 
73  subsets = gsms.get_good_scoring_models(
74  selection_keywords_list=arg.selection_keywords_list,
75  printing_keywords_list=arg.printing_keywords_list,
76  aggregate_lower_thresholds=arg.aggregate_lower_thresholds,
77  aggregate_upper_thresholds=arg.aggregate_upper_thresholds,
78  member_lower_thresholds=arg.member_lower_thresholds,
79  member_upper_thresholds=arg.member_upper_thresholds,
80  extract=arg.extract)
81  return subsets, arg.extract
82 
83 
84 def create_score_files(subsets, field="Total_Score"):
85  arg = parse_args()
86 
87  score_dir = os.path.join(arg.run_dir, "good_scoring_models")
88  scoreA = open(os.path.join(score_dir, arg.score_file_prefix + "A.txt"),
89  "w")
90  scoreB = open(os.path.join(score_dir, arg.score_file_prefix + "B.txt"),
91  "w")
92  model_file = open(os.path.join(score_dir, "model_ids_scores.txt"), "r")
93 
94  print("Creating input files for Total_Score convergence test")
95 
96  for line_index, each_model_line in enumerate(model_file.readlines()):
97 
98  # Find index of the field we want to use for model score convergence
99  if line_index == 0:
100  field_headers = each_model_line.strip().split()
101 
102  try:
103  ts_ix = field_headers.index(field)
104  _ = field_headers.index("Run_id")
105  model_ix = field_headers.index("Model_index")
106 
107  except ValueError:
108  print("create_scores_file: model_ids_scores.txt file has "
109  "an incorrect format.")
110  exit()
111 
112  else:
113  fields = each_model_line.strip().split()
114 
115  score = fields[ts_ix]
116 
117  model = int(fields[model_ix])
118  print(score, file=scoreA if model in subsets[0] else scoreB)
119 
120  model_file.close()
121  scoreA.close()
122  scoreB.close()
123 
124 
125 def main():
126  subsets, extract = select_good_scoring_models()
127 
128  print("Filtered model scores ready")
129 
130  if extract:
131 
132  # Create Score Files
133  create_score_files(subsets)
134 
135  print("Ready to calculate sampling precision with "
136  "`imp_sampcon exhaust`")
137 
138 
139 if __name__ == "__main__":
140  main()