IMP logo
IMP Reference Guide  develop.63b38c487d,2024/12/22
The Integrative Modeling Platform
xl_datasplitter.py
1 #!/usr/bin/env python
2 """@namespace IMP.nestor.xl_datasplitter
3  Script to split a CSV file for use in nested sampling"""
4 
5 import sys
6 import random
7 
8 xl_file = sys.argv[1]
9 perc_to_evi = 0.7
10 
11 xls = []
12 header = None
13 with open(xl_file, "r") as xlf:
14  for ln in xlf.readlines():
15  if (not ln.startswith("Protein1")) and (not ln.startswith("Linker")):
16  xls.append(ln)
17  else:
18  header = ln
19 
20 sampling, evi_calc = [], []
21 for link in xls:
22  rng = random.random()
23  if rng < perc_to_evi:
24  evi_calc.append(link)
25  else:
26  sampling.append(link)
27 
28 fname = xl_file.split("/")[-1]
29 dir_path = xl_file.split("/")
30 if len(dir_path) > 1:
31  dir_path = "/".join(dir_path[0:-1])
32 else:
33  dir_path = "./"
34 with open(f"{dir_path}/sampling_{fname}", "w") as sf:
35  if header is not None:
36  sf.write(header)
37  for lnk in sampling:
38  sf.write(lnk)
39 
40 with open(f"{dir_path}/evicalc_{fname}", "w") as evif:
41  if header is not None:
42  evif.write(header)
43  for lnk in evi_calc:
44  evif.write(lnk)