IMP  2.4.0
The Integrative Modeling Platform
pmi/setup_analysis.py
1 ## \example pmi/setup_analysis.py
2 
3 """This script creates input files for pmi/bin/run_clustering.py
4 """
5 from __future__ import print_function
6 import sys,os
7 import glob
8 
9 output_prefix="./test_output/run"
10 nclusters = 2
11 
12 ### all the possible subunit groups
13 head = ("med6","med8","med11","med17","med18","med20","med22")
14 middle = ("med1","med4","med7","med9","med10","med14","med17","med18","med19","med21","med31")
15 sub_middle = ("med4","med9","med7","med21","med31")
16 tail = ("med2","med3","med5","med14","med15","med16")
17 full = tuple(set(head+middle+tail))
18 
19 std = ["ISDCrossLinkMS_Distance_intrarb",
20  "ISDCrossLinkMS_Distance_interrb",
21  "ISDCrossLinkMS_Data_Score",
22  "SimplifiedModel_Linker_Score_None",
23  "ISDCrossLinkMS_Psi",
24  "ISDCrossLinkMS_Sigma"]
25 
26 em = ["GaussianEMRestraint_None"]
27 
28 # subunits for each job
29 jobs = [[sub_middle,std+em],
30  [middle,std],
31  [middle,std],
32  [middle,std],
33  [middle,std],
34  [middle,std],
35  [middle,std],
36  [full,std+em],
37  [full,std+em],
38  [middle,std+em],
39  [middle,std+em],
40  [tail,std+em],
41  [tail,std+em],
42  [tail,std+em],
43  [tail,std+em],
44  [full,std+em],
45  [full,std+em],
46  [full,std+em],
47  [full,std+em],
48  [full,std+em],
49  [full,std+em],
50  [middle,std+em],
51  [middle,std+em],
52  [tail,std+em],
53  [tail,std+em],
54  [middle,std+em],
55  [middle,std+em],
56  [full,std+em],
57  [tail,std+em],
58  [tail,std+em]]
59 
60 ### info common to all jobs
61 
62 
63 other_options={'global_output_dir' : 'output/',
64  'number_of_best_scoring_models' : 500,
65  'distance_matrix_file' : "distance.rawmatrix.pkl",
66  'load_distance_matrix_file' : 0,
67  'skip_clustering' : 0,
68  'display_plot' : 0,
69  'exit_after_display' : 0,
70  'get_every' : 1,
71  'number_of_clusters' : nclusters,
72  'voxel_size' : 3.0}
73 
74 for nj,(subunits,feature_keys) in enumerate(jobs):
75  if not os.path.isdir(output_prefix+str(nj+1)):
76  continue
77  out_fn = os.path.join(output_prefix+str(nj+1),'analysis.txt')
78  print('writing',out_fn)
79  d = os.path.dirname(out_fn)
80  outf=open(out_fn,'w')
81  dirs=[]
82 
83  for dd in glob.glob(os.path.join(output_prefix+str(nj+1),'*')):
84  if os.path.isdir(dd) and ('modeling' in dd or 'jackmodel' in dd):
85  if os.path.isfile(os.path.join(dd,'best.scores.rex.py')):
86  dirs.append(dd)
87  if len(dirs)==0:
88  continue
89  output_dir = '%s/clustering/kmeans_2_%i/'%(d,nclusters)
90  if not os.path.isdir(os.path.join(d,'clustering')):
91  os.mkdir(os.path.join(d,'clustering'))
92 
93  outf.write('subunits ')
94  for subunit in subunits:
95  outf.write(subunit+' ')
96 
97  outf.write('\nmerge_directories ')
98  for md in dirs:
99  outf.write(md+' ')
100 
101  outf.write('\noutput_dir '+output_dir)
102 
103  outf.write('\nfeature_keys ')
104  for feature in feature_keys:
105  outf.write(feature+' ')
106  outf.write('\n')
107 
108  for key in other_options:
109  outf.write(key+" "+str(other_options[key])+'\n')
110 
111  # lastly, get prefilter value
112  scores=[]
113  for d in dirs:
114  inf=open(os.path.join(d,'best.scores.rex.py'),'r')
115  l=inf.readlines()[0].replace('self.','')
116  exec(l)
117  scores.append(best_score_list[-1])
118  inf.close()
119  outf.write('prefilter_value %.2f\n'%max(scores))
120 
121  outf.close()