doc/ref/xl__datasplitter_8py_source.html

#!/usr/bin/env python

"""@namespace IMP.nestor.xl_datasplitter

   Script to split a CSV file for use in nested sampling"""


import sys

import random


xl_file = sys.argv[1]

perc_to_evi = 0.7


xls = []

header = None

with open(xl_file, "r") as xlf:

    for ln in xlf.readlines():

        if (not ln.startswith("Protein1")) and (not ln.startswith("Linker")):

            xls.append(ln)

        else:

            header = ln


sampling, evi_calc = [], []

for link in xls:

    rng = random.random()

    if rng < perc_to_evi:

        evi_calc.append(link)

    else:

        sampling.append(link)


fname = xl_file.split("/")[-1]

dir_path = xl_file.split("/")

if len(dir_path) > 1:

    dir_path = "/".join(dir_path[0:-1])

else:

    dir_path = "./"

with open(f"{dir_path}/sampling_{fname}", "w") as sf:

    if header is not None:

        sf.write(header)

    for lnk in sampling:

        sf.write(lnk)


with open(f"{dir_path}/evicalc_{fname}", "w") as evif:

    if header is not None:

        evif.write(header)

    for lnk in evi_calc:

        evif.write(lnk)