IMP  2.3.0
The Integrative Modeling Platform
setup_analysis.py
1 ## \example pmi/setup_analysis.py
2 
3 #!/usr/bin/env python
4 
5 """This script creates input files for pmi/bin/run_clustering.py
6 """
7 import sys,os
8 import glob
9 
10 output_prefix="./test_output/run"
11 nclusters = 2
12 
13 ### all the possible subunit groups
14 head = ("med6","med8","med11","med17","med18","med20","med22")
15 middle = ("med1","med4","med7","med9","med10","med14","med17","med18","med19","med21","med31")
16 sub_middle = ("med4","med9","med7","med21","med31")
17 tail = ("med2","med3","med5","med14","med15","med16")
18 full = tuple(set(head+middle+tail))
19 
20 std = ["ISDCrossLinkMS_Distance_intrarb",
21  "ISDCrossLinkMS_Distance_interrb",
22  "ISDCrossLinkMS_Data_Score",
23  "SimplifiedModel_Linker_Score_None",
24  "ISDCrossLinkMS_Psi",
25  "ISDCrossLinkMS_Sigma"]
26 
27 em = ["GaussianEMRestraint_None"]
28 
29 # subunits for each job
30 jobs = [[sub_middle,std+em],
31  [middle,std],
32  [middle,std],
33  [middle,std],
34  [middle,std],
35  [middle,std],
36  [middle,std],
37  [full,std+em],
38  [full,std+em],
39  [middle,std+em],
40  [middle,std+em],
41  [tail,std+em],
42  [tail,std+em],
43  [tail,std+em],
44  [tail,std+em],
45  [full,std+em],
46  [full,std+em],
47  [full,std+em],
48  [full,std+em],
49  [full,std+em],
50  [full,std+em],
51  [middle,std+em],
52  [middle,std+em],
53  [tail,std+em],
54  [tail,std+em],
55  [middle,std+em],
56  [middle,std+em],
57  [full,std+em],
58  [tail,std+em],
59  [tail,std+em]]
60 
61 ### info common to all jobs
62 
63 
64 other_options={'global_output_dir' : 'output/',
65  'number_of_best_scoring_models' : 500,
66  'distance_matrix_file' : "distance.rawmatrix.pkl",
67  'load_distance_matrix_file' : 0,
68  'skip_clustering' : 0,
69  'display_plot' : 0,
70  'exit_after_display' : 0,
71  'get_every' : 1,
72  'number_of_clusters' : nclusters,
73  'voxel_size' : 3.0}
74 
75 for nj,(subunits,feature_keys) in enumerate(jobs):
76  if not os.path.isdir(output_prefix+str(nj+1)):
77  continue
78  out_fn = os.path.join(output_prefix+str(nj+1),'analysis.txt')
79  print 'writing',out_fn
80  d = os.path.dirname(out_fn)
81  outf=open(out_fn,'w')
82  dirs=[]
83 
84  for dd in glob.glob(os.path.join(output_prefix+str(nj+1),'*')):
85  if os.path.isdir(dd) and ('modeling' in dd or 'jackmodel' in dd):
86  if os.path.isfile(os.path.join(dd,'best.scores.rex.py')):
87  dirs.append(dd)
88  if len(dirs)==0:
89  continue
90  output_dir = '%s/clustering/kmeans_2_%i/'%(d,nclusters)
91  if not os.path.isdir(os.path.join(d,'clustering')):
92  os.mkdir(os.path.join(d,'clustering'))
93 
94  outf.write('subunits ')
95  for subunit in subunits:
96  outf.write(subunit+' ')
97 
98  outf.write('\nmerge_directories ')
99  for md in dirs:
100  outf.write(md+' ')
101 
102  outf.write('\noutput_dir '+output_dir)
103 
104  outf.write('\nfeature_keys ')
105  for feature in feature_keys:
106  outf.write(feature+' ')
107  outf.write('\n')
108 
109  for key in other_options:
110  outf.write(key+" "+str(other_options[key])+'\n')
111 
112  # lastly, get prefilter value
113  scores=[]
114  for d in dirs:
115  inf=open(os.path.join(d,'best.scores.rex.py'),'r')
116  l=inf.readlines()[0].replace('self.','')
117  exec(l)
118  scores.append(best_score_list[-1])
119  inf.close()
120  outf.write('prefilter_value %.2f\n'%max(scores))
121 
122  outf.close()