home
about
news
download
doc
source
systems
tests
bugs
contact
IMP Reference Guide
2.22.0
The Integrative Modeling Platform
IMP Manual
Reference Guide
Tutorial Index
Modules
Classes
Examples
lib
IMP
nestor
version 2.22.0
xl_datasplitter.py
1
#!/usr/bin/env python
2
"""@namespace IMP.nestor.xl_datasplitter
3
Script to split a CSV file for use in nested sampling"""
4
5
import
sys
6
import
random
7
8
xl_file = sys.argv[1]
9
perc_to_evi = 0.7
10
11
xls = []
12
header =
None
13
with open(xl_file,
"
r") as xlf:
14
for
ln
in
xlf.readlines():
15
if
(
not
ln.startswith(
"Protein1"
))
and
(
not
ln.startswith(
"Linker"
)):
16
xls.append(ln)
17
else
:
18
header = ln
19
20
sampling, evi_calc = [], []
21
for
link
in
xls:
22
rng = random.random()
23
if
rng < perc_to_evi:
24
evi_calc.append(link)
25
else
:
26
sampling.append(link)
27
28
fname = xl_file.split(
"/"
)[-1]
29
dir_path = xl_file.split(
"/"
)
30
if
len(dir_path) > 1:
31
dir_path =
"/"
.join(dir_path[0:-1])
32
else
:
33
dir_path =
"./"
34
with open(f
"{dir_path}/sampling_{fname}"
,
"w"
)
as
sf:
35
if
header
is
not
None
:
36
sf.write(header)
37
for
lnk
in
sampling:
38
sf.write(lnk)
39
40
with open(f
"{dir_path}/evicalc_{fname}"
,
"w"
)
as
evif:
41
if
header
is
not
None
:
42
evif.write(header)
43
for
lnk
in
evi_calc:
44
evif.write(lnk)