1 """@namespace IMP.spatiotemporal.prepare_protein_library
2 Function for preparing spatiotemporal models for sampling.
11 nmodels, output_dir=
'', template_topology=
'',
12 template_dict={}, match_final_state=
True):
14 Function that reads in experimental stoicheometery data and calculates
15 which compositions and location assignments should be sampled for
16 spatiotemporal modeling, which are saved as config files. Optionally,
17 a PMI topology file can be provided, in which case topology files
18 for each composition and location assignment are also written.
19 The output is 3 types of files:
20 1. *_time.config - configuration files, which list the proteins
21 included at each time point for each model
22 2. time.txt - protein copy number files. Each row is a protein
23 copy number state and each column is the
24 protein copy number in that state. Note that each
25 protein copy number state can result in multiple
27 3. *_time_topol.txt - topology files for each copy number
28 and location assignment.
30 @param times: list of strings, the times at which the stoicheometery
32 @param exp_comp_map: dictionary, which describes protein
34 The key describes the protein, which should correspond to names
35 within the expected_subcomplexes. Only copy numbers for proteins
36 or subcomplexes included in this dictionary will be scored. For
37 each of these proteins, a csv file should be provided with protein
38 copy number data. The csv file should have 3 columns,
39 1) "Time", which matches up to the possible times in the graph,
40 2) "mean", the average protein copy number at that time point
41 from experiment, and 3) "std", the standard deviation of that
42 protein copy number from experiment.
43 @param expected_subcomplexes: list of all possible subcomplex strings
44 in the model. Should be a list without duplicates of
45 all components in the subcomplex configuration files.
46 @param nmodels: int, number of models with different protein copy
47 numbers to generate at each time point.
48 @param output_dir: string, directory where the output will be written.
49 Empty string assumes the current working directory.
50 @param template_topology: string, name of the topology file
51 for the complete complex.
52 (default: '', no topology files are output)
53 @param template_dict: dictionary for connecting the spatiotemporal
54 model to the topology file. The keys (string) are the names of
55 the proteins, defined by the expected_complexes variable.
56 The values (list) are the names of all proteins in the topology
57 file that should have the same copy number
58 as the labeled protein, specifically the "molecule_name."
59 (default: {}, no topology files are output)
60 @param match_final_state: Boolean, determines whether to fix the
61 final state to the state defined by expected_subcomplexes.
62 True enforces this match and thus ensures that the final
63 time has only one state.
67 if not isinstance(times, list):
68 raise TypeError(
"times should be of type list")
69 if not isinstance(exp_comp_map, dict):
70 raise TypeError(
"times should be of type dict")
71 if not isinstance(expected_subcomplexes, list):
72 raise TypeError(
"nmodels should be of type list")
73 if not isinstance(nmodels, int):
74 raise TypeError(
"nmodels should be of type int")
75 if not isinstance(output_dir, str):
76 raise TypeError(
"output_dir should be of type str")
77 if not isinstance(template_topology, str):
78 raise TypeError(
"template_topology should be of type str")
79 if not isinstance(template_dict, dict):
80 raise TypeError(
"template_dict should be of type dict")
81 if not isinstance(match_final_state, bool):
82 raise TypeError(
"match_final_state should be of type bool")
84 if len(output_dir) > 0:
85 if os.path.exists(output_dir):
91 include_topology =
False
93 final_CN = np.zeros(len(exp_comp_map.keys()), dtype=int)
94 for i, key
in enumerate(exp_comp_map.keys()):
95 for subcomplex
in expected_subcomplexes:
102 copy_num.append(range(CN+1))
105 all_library = [iterable
for iterable
in itertools.product(*copy_num)]
108 for i
in range(len(final_CN)):
109 empty_state.append(0)
110 all_library.pop(all_library.index(tuple(empty_state)))
117 if time == times[len(times)-1]
and match_final_state:
118 olist = [list(final_CN)]
121 state_list.append(expected_subcomplexes)
126 unnormalized_weights = []
127 for state
in all_library:
128 unnormalized_weights.append(
129 composition_scoring.calc_likelihood_state(
130 exp_comp_map, time, state))
131 unw = np.array(unnormalized_weights)
133 mindx = np.argsort(unw)[0:nmodels]
138 state = all_library[m]
140 olist.append(list(state))
145 for i, cn
in enumerate(state):
148 found_subcomplex = []
149 for subcomplex
in expected_subcomplexes:
151 if list(exp_comp_map.keys())[i]
in subcomplex:
152 found_subcomplex.append(subcomplex)
154 prots = list(itertools.combinations(found_subcomplex,
157 sub_list.append(prot)
158 if len(sub_list[0]) > 0:
159 cn_list.append(sub_list)
161 all_cn = [iterable
for iterable
in itertools.product(*cn_list)]
173 cn_list2.append(prot)
174 state_list.append(cn_list2)
176 oary = np.array(olist, dtype=int)
178 for prot_name
in exp_comp_map.keys():
179 header = header+str(prot_name)+
'\t\t\t\t'
180 np.savetxt(time +
".txt", oary, header=header)
183 for indx, prot_list
in enumerate(state_list):
184 with open(str(indx + 1) +
"_" + time +
".config",
"w")
as fh:
185 for prot
in prot_list:
186 fh.write(prot +
"\n")
188 if len(template_topology) > 0:
189 include_topology =
True
191 for indx, prot_list
in enumerate(state_list):
196 for prot
in prot_list:
197 if prot
in template_dict.keys():
198 keep_prots.extend(template_dict[prot])
200 raise KeyError(
"Protein " + prot
201 +
' does not exist in template_dict'
204 with open(str(indx + 1) +
"_" + time +
205 "_topol.txt",
"w")
as fh:
206 old = open(template_topology,
'r')
207 line = old.readline()
209 line_split = line.split(
'|')
211 if len(line_split) < 2:
216 if line_split[1]
in keep_prots:
219 elif line_split[1] ==
'molecule_name ':
221 line = old.readline()
225 print(
'Successfully calculated the most likely configurations,'
226 ' and saved them to configuration and topology '
229 print(
'Successfully calculated the most likely configurations,'
230 ' and saved them to configuration files.')
Spatialtemporal scoring in IMP.
def prepare_protein_library
Function that reads in experimental stoicheometery data and calculates which compositions and locatio...