IMP  2.3.1
The Integrative Modeling Platform
setup_analysis.py
1 ## \example pmi/setup_analysis.py
2 
3 """This script creates input files for pmi/bin/run_clustering.py
4 """
5 import sys,os
6 import glob
7 
8 output_prefix="./test_output/run"
9 nclusters = 2
10 
11 ### all the possible subunit groups
12 head = ("med6","med8","med11","med17","med18","med20","med22")
13 middle = ("med1","med4","med7","med9","med10","med14","med17","med18","med19","med21","med31")
14 sub_middle = ("med4","med9","med7","med21","med31")
15 tail = ("med2","med3","med5","med14","med15","med16")
16 full = tuple(set(head+middle+tail))
17 
18 std = ["ISDCrossLinkMS_Distance_intrarb",
19  "ISDCrossLinkMS_Distance_interrb",
20  "ISDCrossLinkMS_Data_Score",
21  "SimplifiedModel_Linker_Score_None",
22  "ISDCrossLinkMS_Psi",
23  "ISDCrossLinkMS_Sigma"]
24 
25 em = ["GaussianEMRestraint_None"]
26 
27 # subunits for each job
28 jobs = [[sub_middle,std+em],
29  [middle,std],
30  [middle,std],
31  [middle,std],
32  [middle,std],
33  [middle,std],
34  [middle,std],
35  [full,std+em],
36  [full,std+em],
37  [middle,std+em],
38  [middle,std+em],
39  [tail,std+em],
40  [tail,std+em],
41  [tail,std+em],
42  [tail,std+em],
43  [full,std+em],
44  [full,std+em],
45  [full,std+em],
46  [full,std+em],
47  [full,std+em],
48  [full,std+em],
49  [middle,std+em],
50  [middle,std+em],
51  [tail,std+em],
52  [tail,std+em],
53  [middle,std+em],
54  [middle,std+em],
55  [full,std+em],
56  [tail,std+em],
57  [tail,std+em]]
58 
59 ### info common to all jobs
60 
61 
62 other_options={'global_output_dir' : 'output/',
63  'number_of_best_scoring_models' : 500,
64  'distance_matrix_file' : "distance.rawmatrix.pkl",
65  'load_distance_matrix_file' : 0,
66  'skip_clustering' : 0,
67  'display_plot' : 0,
68  'exit_after_display' : 0,
69  'get_every' : 1,
70  'number_of_clusters' : nclusters,
71  'voxel_size' : 3.0}
72 
73 for nj,(subunits,feature_keys) in enumerate(jobs):
74  if not os.path.isdir(output_prefix+str(nj+1)):
75  continue
76  out_fn = os.path.join(output_prefix+str(nj+1),'analysis.txt')
77  print 'writing',out_fn
78  d = os.path.dirname(out_fn)
79  outf=open(out_fn,'w')
80  dirs=[]
81 
82  for dd in glob.glob(os.path.join(output_prefix+str(nj+1),'*')):
83  if os.path.isdir(dd) and ('modeling' in dd or 'jackmodel' in dd):
84  if os.path.isfile(os.path.join(dd,'best.scores.rex.py')):
85  dirs.append(dd)
86  if len(dirs)==0:
87  continue
88  output_dir = '%s/clustering/kmeans_2_%i/'%(d,nclusters)
89  if not os.path.isdir(os.path.join(d,'clustering')):
90  os.mkdir(os.path.join(d,'clustering'))
91 
92  outf.write('subunits ')
93  for subunit in subunits:
94  outf.write(subunit+' ')
95 
96  outf.write('\nmerge_directories ')
97  for md in dirs:
98  outf.write(md+' ')
99 
100  outf.write('\noutput_dir '+output_dir)
101 
102  outf.write('\nfeature_keys ')
103  for feature in feature_keys:
104  outf.write(feature+' ')
105  outf.write('\n')
106 
107  for key in other_options:
108  outf.write(key+" "+str(other_options[key])+'\n')
109 
110  # lastly, get prefilter value
111  scores=[]
112  for d in dirs:
113  inf=open(os.path.join(d,'best.scores.rex.py'),'r')
114  l=inf.readlines()[0].replace('self.','')
115  exec(l)
116  scores.append(best_score_list[-1])
117  inf.close()
118  outf.write('prefilter_value %.2f\n'%max(scores))
119 
120  outf.close()