doc/html/emagefit_8py_source.html

#!/usr/bin/env python


import ConfigParser

import sys

import os

import time

import logging

log = logging.getLogger("domino_model")


import IMP

import IMP.atom as atom

import IMP.algebra as alg

import IMP.em2d as em2d


import IMP.em2d.Database as Database

import IMP.em2d.imp_general.io as io

import IMP.em2d.imp_general.alignments as alignments

import IMP.em2d.imp_general.representation as representation

import IMP.em2d.DominoModel as DominoModel

import IMP.em2d.MonteCarloRelativeMoves as MonteCarloRelativeMoves

import IMP.em2d.sampling as sampling

import IMP.em2d.solutions_io as solutions_io

import IMP.em2d.utility as utility


def setup_sampling_schema(model, params):

    """

       Set the discrete states for sampling for DOMINO.

       @param model A DominoModel object

       @param params See the help for the script

    """

    n_rbs = len(model.components_rbs)

    sampling_schema = sampling.SamplingSchema(n_rbs, params.fixed, params.anchor)


    sampling_schema.read_from_database( params.sampling_positions.read,

                                            ["reference_frames"],

                                            params.sampling_positions.max_number,

                                            params.sampling_positions.orderby)

    for i, rb in enumerate(model.components_rbs):

        model.set_rb_states(rb, sampling_schema.get_sampling_transformations(i))


def write_pdbs_for_solutions(params, fn_database, n=10, orderby="em2d",

                    split_components=False):

    """

        Write PDBs for the solutions in the database

        @param params class with the parameters for the experiment

        @param fn_database File containing the database of solutions

        @param n Number of solutions to write

        @param orderby Restraint used for sorting the solutions

        @param split_components If true, each component of the assembly in a

                            solution is written as a separate PDB file

    """

    m = DominoModel.DominoModel()

    m.set_assembly_components(params.fn_pdbs, params.names)

    db = solutions_io.ResultsDB()

    db.connect(fn_database)

    data = db.get_solutions(["reference_frames"], n, orderby)

    db.close()

    for i, d in enumerate(data):

        texts = d[0].split("/") # each row has only one value

        log.debug("Reference frames to generate pdb %s",texts)

        RFs = [io.TextToReferenceFrame(t).get_reference_frame() for t in texts]

        if not split_components:

            fn = "solution-%03d.pdb" % i

            m.write_pdb_for_reference_frames(RFs,fn)

        else:

            fn = "solution-%03d-" % i

            m.write_pdbs_for_reference_frames(RFs,fn)


def write_nth_largest_cluster(params, fn_database, fn_db_clusters,

                                        position, table_name="clusters"):

    """

        @param params class with the parameters for the experiment

        @param fn_database Database file with the reference frames of the

                    solutions

        @param fn_db_clusters Database file for the clusters.

                        It can the same as fn_database or a different one.

        @param table_name Table with the info for the clusters in

                     fn_db_clusters

        @param position Cluster position (by number of elements) requested.

                                           The index of largest cluster is 1.

    """

    m = DominoModel.DominoModel()

    m.set_assembly_components(params.fn_pdbs, params.names)

    # get cluster

    db_clusters = solutions_io.ResultsDB()

    db_clusters.connect(fn_db_clusters)

    cl_record = db_clusters.get_nth_largest_cluster(position)

    log.info("Size of %s largest cluster: %s",position, cl_record.n_elements)

    # get solutions

    db = solutions_io.ResultsDB()

    db.connect(fn_database)

    fields = ("reference_frames",)

    solutions_ids = map(int, cl_record.solutions_ids.split("|"))

    solutions = db.get_solutions_from_list(fields, solutions_ids)

    db.close()

    for i, d in zip(solutions_ids, solutions):

        texts = d[0].split("/")

        RFs = [io.TextToReferenceFrame(t).get_reference_frame() for t in texts]

        fn = "cluster-%02d-solution-%03d.pdb" % (position, i)

        log.debug(fn)

        m.write_pdb_for_reference_frames(RFs, fn)


def generate_domino_model(params, fn_database, fn_log = None):

    """

        Generate a model for an assembly using DOMINO.

        @param params Class with the parameters for the experiment

        @param fn_database Databse file that will contain the solutions

                    SQLite format

    """

    log.info(io.imp_info([IMP, em2d]))

    t0 = time.time()

    m = DominoModel.DominoModel()

    if hasattr(params, "test_opts") and params.test_opts.do_test:

        m.set_assembly(params.test_opts.test_fn_assembly, params.names)

    else:

        m.set_assembly_components(params.fn_pdbs, params.names)

    setup_sampling_schema(m, params)

    if hasattr(params.sampling_positions, "align_before_domino") and \

                params.sampling_positions.align_before_domino:

        m.align_rigid_bodies_states()


    if hasattr(params, "benchmark"):

        m.set_native_assembly_for_benchmark(params)

    set_pair_score_restraints(params, m)

    set_xlink_restraints(params, m)

    set_geometric_complementarity_restraints(params, m)

    if hasattr(params.domino_params,"fn_merge_tree"):

        fn = params.domino_params.fn_merge_tree

        if not os.path.exists(fn):

            raise IOError("merge tree file not found: %s" % fn)

        m.read_merge_tree(fn)

    else:

        m.create_merge_tree()

    set_connectivity_restraints(params, m)

    set_pairs_excluded_restraint(params, m)

    set_em2d_restraints(params, m)

    m.setup_domino_sampler()

    m.do_sampling("assignments_heap_container", params.domino_params)

    tf = time.time()

    log.info("Total time for the sampling (non-parallel): %s",tf-t0)

    m.write_solutions_database(fn_database, params.n_solutions)


def print_restraints(params):

    """

        Generate a model for an assembly using DOMINO.

        @param params Class with the parameters for the experiment

        @param fn_database Databse file that will contain the solutions

                    SQLite format

    """

    log.info(io.imp_info([IMP, em2d]))

    t0 = time.time()

    m = DominoModel.DominoModel()

    if hasattr(params, "test_opts") and params.test_opts.do_test:

        m.set_assembly(params.test_opts.test_fn_assembly, params.names)

    else:

        m.set_assembly_components(params.fn_pdbs, params.names)

    if hasattr(params, "benchmark"):

        m.set_native_assembly_for_benchmark(params)

    set_pair_score_restraints(params, m)

    set_xlink_restraints(params, m)

    set_geometric_complementarity_restraints(params, m)

    set_connectivity_restraints(params, m)

    set_pairs_excluded_restraint(params, m)

    set_em2d_restraints(params, m)

    log.debug("RESTRAINTS FOR THE INPUT CONFIGURATION")

    DominoModel.print_restraints_values(m.model)


def set_pair_score_restraints(params, model):

    """ Set the pair score restraints in the DominoModel

        @param model DominoModel class

        @param params See the help for the script

    """

    if hasattr(params, "pair_score_restraints"):

        model.create_coarse_assembly(params.n_residues)

        for r_params in params.pair_score_restraints:

            model.set_pair_score_restraint(*r_params)


def set_xlink_restraints(params, model):

    """ Set the cross-linking restraints in the DominoModel

        @param model DominoModel class

        @param params See the help for the script

    """

    if hasattr(params, "xlink_restraints"):

        for params in params.xlink_restraints:

            model.set_xlink_restraint(*params)


def set_geometric_complementarity_restraints(params, model):

    """ Set the geometric complementarity restraints in the DominoModel

        @param model DominoModel class

        @param params See the help for the script

    """

    if hasattr(params, "complementarity_restraints"):

        model.create_coarse_assembly(params.n_residues)

        for r_params in params.complementarity_restraints:

            model.set_complementarity_restraint(*r_params)


def set_connectivity_restraints(params, model):

    """ Set the connectivity restraints in the DominoModel

        @param model DominoModel class

        @param params See the help for the script

    """

    if hasattr(params,"connectivity_restraints"):

        model.create_coarse_assembly(params.n_residues)

        for r_params in params.connectivity_restraints:

            model.set_connectivity_restraint(*args)


def set_pairs_excluded_restraint(params, model) :

    """

        All against all excluded volume restraints

        @param model DominoModel class

        @param params See the help for the script

    """

    if hasattr(params,"pairs_excluded_restraint"):

        model.create_coarse_assembly(params.n_residues)

        model.set_close_pairs_excluded_volume_restraint(

                                            *params.pairs_excluded_restraint)


def set_em2d_restraints(params, model):

    """

        Restraints related to the match to EM images

        @param model DominoModel class

        @param params See the help for the script

    """

    if hasattr(params,"em2d_restraints"):

        for r_params in params.em2d_restraints:

            model.set_em2d_restraint(*r_params)


def generate_monte_carlo_model(params, fn_database, seed,

                               write_solution=False, fn_log = None):

    """

        Generate a model for an assembly using MonteCarlo optimization

        @param params Class with the parameters for the experiment

        @param fn_database Datbase file where the solutions will be written.

                            SQLite format.

        @param write_solution If True, writes a PDB with the final model

        @param fn_log File for logging. If the value is None, no file is written

    """

    log.info(io.imp_info([IMP, em2d]))

    m = DominoModel.DominoModel()

    m.set_assembly_components(params.fn_pdbs, params.names)

    set_pair_score_restraints(params, m)

    set_xlink_restraints(params, m)

    set_geometric_complementarity_restraints(params, m)

    set_connectivity_restraints(params, m)

    set_pairs_excluded_restraint(params, m)

    set_em2d_restraints(params, m )

    if hasattr(params, "benchmark"):

        m.set_native_assembly_for_benchmark(params)


    MonteCarloRelativeMoves.set_random_seed(seed)

    mc = MonteCarloRelativeMoves.MonteCarloRelativeMoves(m.model,

                                                m.components_rbs, params.anchor)

    mc.set_temperature_pattern(params.monte_carlo.temperatures,

                                  params.monte_carlo.iterations,

                                  params.monte_carlo.cycles)

    mc.set_moving_parameters(params.monte_carlo.max_translations,

                                params.monte_carlo.max_rotations)

    if not hasattr(params,"dock_transforms"):

        mc.dock_transforms = []

    else:

        mc.dock_transforms = params.dock_transforms


    # Probability for a component of the assembly of doing random movement

    # of doing a relative movement respect to another component

    mc.non_relative_move_prob = params.monte_carlo.non_relative_move_prob

    mc.run_monte_carlo_with_relative_movers()

    m.write_monte_carlo_solution(fn_database)

    if write_solution:

        atom.write_pdb(m.assembly, fn_database + ".pdb")


def create_dockings_from_xlinks(params):

    """

        Perform dockings that satisfy the cross-linking restraints.

        1) Based on the number of restraints, creates an order for the

           docking between pairs of subunits, favouring the subunits with

           more crosslinks to be the "receptors"

        2) Moves the subunits that are going to be docked to a position that

           satisfies the x-linking restraints. There is no guarantee that this

           position is correct. Its purpose is to help the docking

           algorithm with a clue of the proximity/orientation between subunits

        3) Performs docking between the subunits

        4) Filters the results of the docking that are not consistent with

           the cross-linking restraints

        5) Computes the relative transformations between the rigid bodies

           of the subunits that have been docked

        @param params Class with the parameters for the experiment

    """

    log.info("Creating initial assembly from xlinks and docking")

    import emagefit_dock as dock

    import IMP.em2d.buildxlinks as bx

    m = DominoModel.DominoModel()

    m.set_assembly_components(params.fn_pdbs, params.names)

    set_xlink_restraints(params, m)

    order = bx.DockOrder()

    order.set_xlinks(m.xlinks_dict)

    docking_pairs = order.get_docking_order()


    if hasattr(params, "have_hexdock"):

        if not params.have_hexdock:

            return


    for rec, lig in docking_pairs:

        xlinks_list = m.xlinks_dict.get_xlinks_for_pair((rec,lig))

        log.debug("Xlinks for the pair %s %s",rec, lig)

        for xl in xlinks_list:

            log.debug("%s", xl.show())


        h_receptor = representation.get_component(m.assembly, rec)

        h_ligand = representation.get_component(m.assembly, lig)

        rb_receptor = representation.get_rigid_body(m.components_rbs,

                                         representation.get_rb_name(rec))

        rb_ligand = representation.get_rigid_body(m.components_rbs,

                                         representation.get_rb_name(lig))

        initial_ref = rb_ligand.get_reference_frame()

        # move to the initial docking position

        mv = bx.InitialDockingFromXlinks()

        mv.set_xlinks(xlinks_list)

        mv.set_hierarchies(h_receptor, h_ligand)

        mv.set_rigid_bodies(rb_receptor, rb_ligand)

        mv.move_ligand()

        fn_initial_docking = "%s-%s_initial_docking.pdb" % (rec,lig)

        mv.write_ligand(fn_initial_docking)

        # dock

        dock.check_for_hexdock()

        hex_docking = dock.HexDocking()

        receptor_index = params.names.index(rec)

        fn_transforms = "hex_solutions_%s-%s.txt" % (rec, lig)

        fn_docked = "%s-%s_hexdock.pdb" % (rec, lig)

        hex_docking.dock(params.fn_pdbs[receptor_index],

                         fn_initial_docking, fn_transforms, fn_docked, False)


        sel = atom.ATOMPDBSelector()

        new_m = IMP.Model()

        # After reading the file with the initial solution, the reference frame

        # for the rigid body of the ligand is not necessarily the  same one

        # that it had when saved.

        # Thus reading the file again ensures  consisten results when

        # using the HEXDOCK transforms

        new_h_ligand =  atom.read_pdb(fn_initial_docking, new_m, sel)

        new_rb_ligand = atom.create_rigid_body(new_h_ligand)

        Tlig = new_rb_ligand.get_reference_frame().get_transformation_to()

        fn_filtered = "hex_solutions_%s-%s_filtered.txt" % (rec, lig)

        # new_h_ligand contains the coordinates of the ligand after moving it

        # to the initial position for the docking

        dock.filter_docking_results(h_receptor, new_h_ligand, xlinks_list,

                                            fn_transforms, fn_filtered)

        # transforms to apply to the ligand as it is in the file

        # fn_initial_docking

        transformations_hexdock = dock.read_hex_transforms(fn_filtered)


        # The initial position the ligand, by construction, is a "docking"

        # solution, very bad probably, but is an starting point for Monte Carlo

        # refinement. Hexdock therefore would "apply" an identity transformation

        log.info("Adding the initial approximate transformation to the " \

                " set of possible transformations")

        transformations_hexdock.append(alg.get_identity_transformation_3d())


        Trec = rb_receptor.get_reference_frame().get_transformation_to()

        Tinternal = []

        for i,T in enumerate(transformations_hexdock):

            Tdock = alg.compose(T, Tlig)

            ref = alg.ReferenceFrame3D(Tdock)

            new_rb_ligand.set_reference_frame(ref)

            # internal transformation. The relationship is Tdock = Trec * Ti

            Ti = alg.compose(Trec.get_inverse(), Tdock)

            Tinternal.append(Ti)

        fn_relative = "relative_positions_%s-%s.txt" % (rec, lig)

        io.write_transforms(Tinternal, fn_relative)

        rb_ligand.set_reference_frame(initial_ref)

        #os.remove(fn_initial_docking)


if __name__ == "__main__":

    import ConfigParser

    cparser = ConfigParser.SafeConfigParser()


    parser = IMP.OptionParser(description="Domino grid sampling",

                              imp_module=em2d)

    parser.add_option("--o",

                    dest="fn_database",

                    default = False,

                    help="SQLite file. This file is an output when modeling. " \

                        " It is an input parameter when querying the database.")

    parser.add_option("--exp",

                     dest="fn_params",

                     default=None,

                     help="Experiment file. This should be a Python file "

                          "containing a class called Experiment containing "

                          "all parameters required to do the modeling.")

    parser.add_option("--w",

                    type=int,

                     dest="write",

                      default=None,

                     help="write the best scored solutions to PDB files.")

    parser.add_option("--wcl",

                     dest="write_cluster",

                     metavar="fn_db_clusters position",

                     nargs = 2,

                     default=None,

                     help="Write the n-th largest cluster. This option has " \

                            "two arguments. The first one is the database file " \

                            "containing the clusters. The second one is the " \

                            "number (position) of the cluster to write. " \

                            "The index of the first cluster is 1.")

    parser.add_option("--orderby",

                      dest="orderby",

                      default=None,

                      help="Measure used to sort solutions when saving them " \

                      "to PDB")

    parser.add_option("--monte_carlo", dest="monte_carlo",

                      metavar = "number",

                      type=int,

                      default=False,

                      help="Run a MonteCarlo optimization. There is one " \

                       "parameter passed to the option. When using this " \

                       "script in a cluster and sending a lot of them at the "\

                       "same time, the effective random number obtained by " \

                       "seeding using the time is the same for all scripts. " \

                       "To avoid that situation, you can pass an integer. " \

                       "If the number is -1, the time is " \

                       "used as the seed (useful for one core)")

    parser.add_option("--dock", dest="dock",

                    action="store_true",

                    default=False,

                    help="Perform docking of the components of the assembly "\

                         "guided by cross-linking restraints")

    parser.add_option("--merge", dest="merge",

                  action="callback",

                    callback=utility.vararg_callback,

                    help="Merge the database files")

    parser.add_option("--gather", dest="gather",

                  action="callback",

                    callback=utility.vararg_callback,

                    help="Gather database files and store then in a single " \

                         "file. The arguments are the names of the databases "\

                         "to gather.")

    parser.add_option("--log",

                     dest="log",

                     default=None,

                     help="File for logging.")

    parser.add_option("--cdrms",

                     dest="cdrms",

                     type=int,

                     default=False,

                     metavar="number",

                     help="Align the best solutions to the " \

                            "native using the DRMS of the centroids.")


    parser.add_option("--pr",

                    action="store_true",

                     dest="pr_rest",

                     default=False,

                     help="Print restraints of model and native")


    args = parser.parse_args()

    args = args[0]

    if len(sys.argv) == 1:

        parser.print_help()

        quit()

    if args.log:

        logging.basicConfig(filename=args.log, filemode="w")

    else:

        logging.basicConfig(stream=sys.stdout)

    logging.root.setLevel(logging.INFO)


    if args.merge:

        max_number_of_results = 100000

        t0 = time.time()

        if not args.fn_database or not args.orderby:

            raise ValueError("Merging databases requires the output database " \

                          "results and the name of the restraint to order by")

        solutions_io.gather_best_solution_results(args.merge, args.fn_database,

                max_number=max_number_of_results, orderby=args.orderby)

        log.info("Merging done. Time %s", time.time() - t0)

        quit()

    if args.gather:

        t0 = time.time()

        solutions_io.gather_solution_results(args.gather, args.fn_database)

        log.info("Gathering done. Time %s", time.time() - t0)

        quit()


    if args.write:

        if not args.fn_database or not args.orderby:

            raise ValueError("Writing solutions requires the database of " \

                         "results and the name of the restraint to order by")

        if args.orderby == "False":

            args.orderby = False

        params = utility.get_experiment_params(args.fn_params)

        write_pdbs_for_solutions(params, args.fn_database,

                                 args.write, args.orderby, )

        quit()


    if args.write_cluster:

        if not args.fn_database:

            raise ValueError("Writing clusters requires the database file")

        fn_db_clusters = args.write_cluster[0]

        position = int(args.write_cluster[1])

        params = utility.get_experiment_params(args.fn_params)

        write_nth_largest_cluster(params,  args.fn_database,

                                  fn_db_clusters, position )

        quit()


    if args.cdrms:

        if not args.fn_database or not args.orderby:

            raise ValueError("Writing models requires the database of " \

                            "results and the name of the restraint to order by")

        params = utility.get_experiment_params(args.fn_params)


        write_pdbs_aligned_by_cdrms(params, args.fn_database,

                                    args.cdrms, args.orderby)

        quit()


    if args.monte_carlo:

        params = utility.get_experiment_params(args.fn_params)

        generate_monte_carlo_model(params,

                                   args.fn_database, args.monte_carlo)

        quit()


    if args.dock:

        params = utility.get_experiment_params(args.fn_params)

        create_dockings_from_xlinks(params)

        quit()


    if(args.pr_rest):

        params = utility.get_experiment_params(args.fn_params)

        print_restraints(params)

        quit()


    params = utility.get_experiment_params(args.fn_params)

    generate_domino_model(params, args.fn_database)