home
about
news
download
doc
source
systems
tests
bugs
contact
IMP Reference Guide
develop.d97d4ead1f,2024/11/21
The Integrative Modeling Platform
IMP Manual
Reference Guide
Tutorial Index
Modules
Classes
Examples
lib
IMP
nestor
version 20241121.develop.d97d4ead1f
xl_datasplitter.py
1
#!/usr/bin/env python
2
"""@namespace IMP.nestor.xl_datasplitter
3
Script to split a CSV file for use in nested sampling"""
4
5
import
sys
6
import
random
7
8
xl_file = sys.argv[1]
9
perc_to_evi = 0.7
10
11
xls = []
12
header =
None
13
with open(xl_file,
"
r") as xlf:
14
for
ln
in
xlf.readlines():
15
if
(
not
ln.startswith(
"Protein1"
))
and
(
not
ln.startswith(
"Linker"
)):
16
xls.append(ln)
17
else
:
18
header = ln
19
20
sampling, evi_calc = [], []
21
for
link
in
xls:
22
rng = random.random()
23
if
rng < perc_to_evi:
24
evi_calc.append(link)
25
else
:
26
sampling.append(link)
27
28
fname = xl_file.split(
"/"
)[-1]
29
dir_path = xl_file.split(
"/"
)
30
if
len(dir_path) > 1:
31
dir_path =
"/"
.join(dir_path[0:-1])
32
else
:
33
dir_path =
"./"
34
with open(f
"{dir_path}/sampling_{fname}"
,
"w"
)
as
sf:
35
if
header
is
not
None
:
36
sf.write(header)
37
for
lnk
in
sampling:
38
sf.write(lnk)
39
40
with open(f
"{dir_path}/evicalc_{fname}"
,
"w"
)
as
evif:
41
if
header
is
not
None
:
42
evif.write(header)
43
for
lnk
in
evi_calc:
44
evif.write(lnk)