IMP logo
IMP Reference Guide  2.21.0
The Integrative Modeling Platform
xl_datasplitter.py
1 """@namespace IMP.nestor.xl_datasplitter
2  Script to split a CSV file for use in nested sampling"""
3 
4 import sys
5 import random
6 
7 xl_file = sys.argv[1]
8 perc_to_evi = 0.7
9 
10 xls = []
11 header = None
12 with open(xl_file, 'r') as xlf:
13  for ln in xlf.readlines():
14  if (not ln.startswith('Protein1')) and (not ln.startswith('Linker')):
15  xls.append(ln)
16  else:
17  header = ln
18 
19 sampling, evi_calc = [], []
20 for link in xls:
21  rng = random.random()
22  if rng < perc_to_evi:
23  evi_calc.append(link)
24  else:
25  sampling.append(link)
26 
27 fname = xl_file.split('/')[-1]
28 dir_path = xl_file.split('/')
29 if len(dir_path) > 1:
30  dir_path = '/'.join(dir_path[0:-1])
31 else:
32  dir_path = './'
33 with open(f'{dir_path}/sampling_{fname}', 'w') as sf:
34  if header is not None:
35  sf.write(header)
36  for lnk in sampling:
37  sf.write(lnk)
38 
39 with open(f'{dir_path}/evicalc_{fname}', 'w') as evif:
40  if header is not None:
41  evif.write(header)
42  for lnk in evi_calc:
43  evif.write(lnk)