3 """This script creates input files for pmi/bin/run_clustering.py
8 output_prefix=
"./test_output/run"
12 head = (
"med6",
"med8",
"med11",
"med17",
"med18",
"med20",
"med22")
13 middle = (
"med1",
"med4",
"med7",
"med9",
"med10",
"med14",
"med17",
"med18",
"med19",
"med21",
"med31")
14 sub_middle = (
"med4",
"med9",
"med7",
"med21",
"med31")
15 tail = (
"med2",
"med3",
"med5",
"med14",
"med15",
"med16")
16 full = tuple(set(head+middle+tail))
18 std = [
"ISDCrossLinkMS_Distance_intrarb",
19 "ISDCrossLinkMS_Distance_interrb",
20 "ISDCrossLinkMS_Data_Score",
21 "SimplifiedModel_Linker_Score_None",
23 "ISDCrossLinkMS_Sigma"]
25 em = [
"GaussianEMRestraint_None"]
28 jobs = [[sub_middle,std+em],
62 other_options={
'global_output_dir' :
'output/',
63 'number_of_best_scoring_models' : 500,
64 'distance_matrix_file' :
"distance.rawmatrix.pkl",
65 'load_distance_matrix_file' : 0,
66 'skip_clustering' : 0,
68 'exit_after_display' : 0,
70 'number_of_clusters' : nclusters,
73 for nj,(subunits,feature_keys)
in enumerate(jobs):
74 if not os.path.isdir(output_prefix+str(nj+1)):
76 out_fn = os.path.join(output_prefix+str(nj+1),
'analysis.txt')
77 print 'writing',out_fn
78 d = os.path.dirname(out_fn)
82 for dd
in glob.glob(os.path.join(output_prefix+str(nj+1),
'*')):
83 if os.path.isdir(dd)
and (
'modeling' in dd
or 'jackmodel' in dd):
84 if os.path.isfile(os.path.join(dd,
'best.scores.rex.py')):
88 output_dir =
'%s/clustering/kmeans_2_%i/'%(d,nclusters)
89 if not os.path.isdir(os.path.join(d,
'clustering')):
90 os.mkdir(os.path.join(d,
'clustering'))
92 outf.write(
'subunits ')
93 for subunit
in subunits:
94 outf.write(subunit+
' ')
96 outf.write(
'\nmerge_directories ')
100 outf.write(
'\noutput_dir '+output_dir)
102 outf.write(
'\nfeature_keys ')
103 for feature
in feature_keys:
104 outf.write(feature+
' ')
107 for key
in other_options:
108 outf.write(key+
" "+str(other_options[key])+
'\n')
113 inf=open(os.path.join(d,
'best.scores.rex.py'),
'r')
114 l=inf.readlines()[0].replace('self.',
'')
116 scores.append(best_score_list[-1])
118 outf.write(
'prefilter_value %.2f\n'%max(scores))