5 """This script creates input files for pmi/bin/run_clustering.py
10 output_prefix=
"./test_output/run"
14 head = (
"med6",
"med8",
"med11",
"med17",
"med18",
"med20",
"med22")
15 middle = (
"med1",
"med4",
"med7",
"med9",
"med10",
"med14",
"med17",
"med18",
"med19",
"med21",
"med31")
16 sub_middle = (
"med4",
"med9",
"med7",
"med21",
"med31")
17 tail = (
"med2",
"med3",
"med5",
"med14",
"med15",
"med16")
18 full = tuple(set(head+middle+tail))
20 std = [
"ISDCrossLinkMS_Distance_intrarb",
21 "ISDCrossLinkMS_Distance_interrb",
22 "ISDCrossLinkMS_Data_Score",
23 "SimplifiedModel_Linker_Score_None",
25 "ISDCrossLinkMS_Sigma"]
27 em = [
"GaussianEMRestraint_None"]
30 jobs = [[sub_middle,std+em],
64 other_options={
'global_output_dir' :
'output/',
65 'number_of_best_scoring_models' : 500,
66 'distance_matrix_file' :
"distance.rawmatrix.pkl",
67 'load_distance_matrix_file' : 0,
68 'skip_clustering' : 0,
70 'exit_after_display' : 0,
72 'number_of_clusters' : nclusters,
75 for nj,(subunits,feature_keys)
in enumerate(jobs):
76 if not os.path.isdir(output_prefix+str(nj+1)):
78 out_fn = os.path.join(output_prefix+str(nj+1),
'analysis.txt')
79 print 'writing',out_fn
80 d = os.path.dirname(out_fn)
84 for dd
in glob.glob(os.path.join(output_prefix+str(nj+1),
'*')):
85 if os.path.isdir(dd)
and (
'modeling' in dd
or 'jackmodel' in dd):
86 if os.path.isfile(os.path.join(dd,
'best.scores.rex.py')):
90 output_dir =
'%s/clustering/kmeans_2_%i/'%(d,nclusters)
91 if not os.path.isdir(os.path.join(d,
'clustering')):
92 os.mkdir(os.path.join(d,
'clustering'))
94 outf.write(
'subunits ')
95 for subunit
in subunits:
96 outf.write(subunit+
' ')
98 outf.write(
'\nmerge_directories ')
102 outf.write(
'\noutput_dir '+output_dir)
104 outf.write(
'\nfeature_keys ')
105 for feature
in feature_keys:
106 outf.write(feature+
' ')
109 for key
in other_options:
110 outf.write(key+
" "+str(other_options[key])+
'\n')
115 inf=open(os.path.join(d,
'best.scores.rex.py'),
'r')
116 l=inf.readlines()[0].replace('self.',
'')
118 scores.append(best_score_list[-1])
120 outf.write(
'prefilter_value %.2f\n'%max(scores))