doc/html/shared__functions_8py_source.html

#!/usr/bin/env python

import sys,os

from glob import glob


import IMP

import IMP.atom

import IMP.container

import IMP.saxs

import IMP.core

import IMP.isd

import IMP.isd.TBLReader

import IMP.isd.TALOSReader

import IMP.isd.utils

from IMP.isd.Entry import Entry


from math import sqrt,exp

from StringIO import StringIO


from random import random


kB= (1.381 * 6.02214) / 4184.0


class PyMDMover:

    def __init__(self, cont, md, n_md_steps):

        self.cont = cont

        self.md = md

        self.n_md_steps = n_md_steps

        self.velkeys=[IMP.FloatKey("vx"),

                    IMP.FloatKey("vy"), IMP.FloatKey("vz")]


    def store_move(self):

        self.oldcoords=[]

        #self.oldvel=[]

        for i in xrange(self.cont.get_number_of_particles()):

            p=self.cont.get_particle(i)

            #self.oldvel.append([p.get_value(k) for k in self.velkeys])

            d=IMP.core.XYZ(p)

            self.oldcoords.append(d.get_coordinates())


    def propose_move(self, prob):

        self.md.optimize(self.n_md_steps)


    def reset_move(self):

        for i in xrange(self.cont.get_number_of_particles()):

            p=self.cont.get_particle(i)

            #for j,k in enumerate(self.velkeys):

            #    p.set_value(k, self.oldvel[i][j])

            d=IMP.core.XYZ(p)

            d.set_coordinates(self.oldcoords[i])


    def get_number_of_steps(self):

        return self.n_md_steps


    def set_number_of_steps(self, nsteps):

        self.n_md_steps = nsteps


class PyMC(IMP.Optimizer):

    debug =True


    def __init__(self,model):

        self.m=model


    def add_mover(self,mv):

        self.mv = mv


    def set_kt(self,kT):

        self.kT=kT


    def set_return_best(self,thing):

        pass

    def set_move_probability(self,thing):

        pass


    def get_energy(self):

        pot=self.m.evaluate(False)

        #pot=self.get_restraints().evaluate(False)

        kin=self.mv.md.get_kinetic_energy()

        return pot+kin


    def metropolis(self, old, new):

        deltaE=new-old

        if self.debug:

            print ": old %f new %f deltaE %f new_epot: %f" % (old,new,deltaE,

                self.m.evaluate(False)),

        kT=self.kT

        if deltaE < 0:

            return True

        else:

            return exp(-deltaE/kT) > random()


    def optimize(self,nsteps):

        self.naccept = 0

        print "=== new MC call"

        #store initial coordinates

        self.mv.store_move()

        for i in xrange(nsteps):

            print "MC step %d " % i,

            #draw new velocities

            self.mv.md.assign_velocities(self.kT / kB)

            #get total energy

            old=self.get_energy()

            #make a MD move

            self.mv.propose_move(1)

            #get new total energy

            new=self.get_energy()

            if self.metropolis(old,new):

                #move was accepted: store new conformation

                self.mv.store_move()

                self.naccept += 1

                print "accepted "

            else:

                #move rejected: restore old conformation

                self.mv.reset_move()

                print " "


    def get_number_of_forward_steps(self):

        return self.naccept


class sfo_common:

    """nonspecific methods used across all shared function objects.

    Rules:

        - Their name starts with the name of the parent function (e.g.

                init_model_* )

        - they don't store anything in the class, but instead

                return all created objects.

                Exceptions: the model, which is self._m

                            the statistics class, which is self.stat

        - they store what they have to store in the model (e.g. restraints)

        - they don't print out anything except for long routines (e.g. NOE

          parsing)

        - the prior RestraintSet is added to the model when it is created, so

          that when it is passed to another function, it is not added twice.

    """


    def hello(self):

        return "hello world"


    def set_checklevel(self,value):

        IMP.base.set_check_level(value)


    def set_loglevel(self,value):

        IMP.base.set_log_level(value)


    def m(self,name,*args,**kw):

        "wrapper to call methods of m"

        func=getattr(self._m,name)

        return func(*args, **kw)


    def init_model_base(self, wd):

        "moves to wd and creates model"

        os.chdir(wd)

        # Create an IMP model

        self._m = IMP.Model()


    def init_model_charmm_protein_and_ff(self, initpdb, top, par, selector, pairscore,

            ff_temp=300.0, disulfides=None, representation='custom'):

        """creates a CHARMM protein representation.

        creates the charmm force field, bonded and nonbonded.

        - initpdb: initial structure in pdb format

        - top is a CHARMM top.lib, read if representation=='custom' (default)

        - par is a CHARMM par.lib

        - selector is an instance of

            one of the selectors of IMP.atom, for example

            IMP.atom.NonWaterNonHydrogenPDBSelector().

        - pairscore is an instance of a Pair Score to score the interaction

            between two atoms. usually, it's either

            LennardJonesDistancePairScore(0,1) or

            RepulsiveDistancePairScore(0,1)

        - ff_temp is the temperature at which the force field should be

          simulated.

        - disulfides: if not None, a list of tuples corresponding to residue

                      numbers that should be cross-linked. Residues should be

                      cysteines, and residue numbering should start at 0.

        - representation: 'full' : all-atom CHARMM force field

                          'heavy': heavy atom CHARMM forcefield with polar H

                          'calpha': only C alphas, ball and stick model with

                                    bondlength 3.78 angstrom, beads at VdW

                                    contact, and harmonic restraint between

                                    them.

                          'custom' (default) : read given CHARMM top and par

                                               files.

        Returns: prot, ff, rsb, rs

            - prot: the protein

            - ff: the force field

            - rsb: the RestraintSet on bonded interactions

            - rs: the RestraintSet on nonbonded interactions. Both are weighted

              using ff_temp.

        """

        m=self._m

        prot = IMP.atom.read_pdb(initpdb, m, selector)

        if not prot.get_is_valid(True):

            raise ValueError, "invalid hierarchy!"

        if representation == 'custom':

            # Read in the CHARMM heavy atom topology and parameter files

            ff = IMP.atom.CHARMMParameters(top,par)

        elif representation == 'heavy':

            ff= IMP.atom.get_heavy_atom_CHARMM_parameters()

        elif representation == 'full':

            ff= IMP.atom.get_all_atom_CHARMM_parameters()

        elif representation == 'calpha':

            pass

        else:

            raise NotImplementedError, representation

        if representation == 'calpha':

            print "setting up simplified C alpha force field"

            # set radii

            for ca in IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE):

                IMP.core.XYZR(ca.get_particle()).set_radius(1.89)

                #IMP.atom.Mass(ca.get_particle()).set_mass(1.)

            #create bonds by getting pairs and applying a pairscore

            pairs=[]

            for chain in prot.get_children():

                residues=[(chain.get_child(i),chain.get_child(i+1))

                            for i in xrange(chain.get_number_of_children()-1)]

                residues=[(i.get_child(0).get_particle(),

                           j.get_child(0).get_particle())

                            for (i,j) in residues]

                pairs.extend(residues)

            bonds=[]

            for (i,j) in pairs:

                #check, because it fails if you try twice

                if IMP.atom.Bonded.particle_is_instance(i):

                    bi = IMP.atom.Bonded(i)

                else:

                    bi = IMP.atom.Bonded.setup_particle(i)

                if IMP.atom.Bonded.particle_is_instance(j):

                    bi = IMP.atom.Bonded(i)

                else:

                    bj = IMP.atom.Bonded.setup_particle(j)

                bond=IMP.atom.create_custom_bond(bi,bj,3.78,10) #stiff

                bonds.append(bond)

            bonds_container = IMP.container.ListSingletonContainer(bonds)

            hdps = IMP.core.Harmonic(0,1)

            bs = IMP.atom.BondSingletonScore(hdps)

            br = IMP.container.SingletonsRestraint(bs, bonds_container)

            rsb = IMP.RestraintSet("bonded")

            rsb.add_restraint(br)

            rsb.set_weight(1.0/(kB*ff_temp))

            m.add_restraint(rsb)

            nonbonded_pair_filter = IMP.atom.StereochemistryPairFilter()

            nonbonded_pair_filter.set_bonds(bonds)

            ff=None

        else:

            print "setting up CHARMM forcefield"

            #

            # Using the CHARMM libraries, determine the ideal topology (atoms and their

            # connectivity) for the PDB file's primary sequence

            topology = ff.create_topology(prot)

            # Typically this modifies the C and N termini of each chain in the protein by

            # applying the CHARMM CTER and NTER patches. Patches can also be manually

            # applied at this point, e.g. to add disulfide bridges.

            topology.apply_default_patches()

            #disulfides

            if disulfides:

                s=topology.get_segment(0)

                dis=ff.get_patch('DISU')

                for (i,j) in disulfides:

                    self.find_atom((i, 'SG'), prot)

                    self.find_atom((j, 'SG'), prot)

                    r0=s.get_residue(i)

                    r1=s.get_residue(j)

                    if i==0:

                        r0.set_patched(False)

                    if j==0:

                        r1.set_patched(False)

                    dis.apply(r0,r1)

                    print "added disulfide bridge between cysteines %d and %d" % (i,j)

            # Make the PDB file conform with the topology; i.e. if it contains extra

            # atoms that are not in the CHARMM topology file, remove them; if it is

            # missing atoms (e.g. sidechains, hydrogens) that are in the CHARMM topology,

            # add them and construct their Cartesian coordinates from internal coordinate

            # information.

            topology.setup_hierarchy(prot)

            # Set up and evaluate the stereochemical part (bonds, angles, dihedrals,

            # impropers) of the CHARMM forcefield

            r = IMP.atom.CHARMMStereochemistryRestraint(prot, topology)

            rsb = IMP.RestraintSet("bonded")

            rsb.add_restraint(r)

            m.add_restraint(rsb)

            #

            # Add non-bonded interaction (in this case, Lennard-Jones). This needs to

            # know the radii and well depths for each atom, so add them from the forcefield

            # (they can also be assigned manually using the XYZR or LennardJones

            # decorators):

            ff.add_radii(prot)

            ff.add_well_depths(prot)

            nonbonded_pair_filter = r.get_pair_filter()

        # Get a list of all atoms in the protein, and put it in a container

        atoms = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)

        cont = IMP.container.ListSingletonContainer(atoms)

        # Add a restraint for the Lennard-Jones interaction. This is built from

        # a collection of building blocks. First, a ClosePairContainer maintains a list

        # of all pairs of Particles that are close. Next, all 1-2, 1-3 and 1-4 pairs

        # from the stereochemistry created above are filtered out.

        # Then, a LennardJonesPairScore scores a pair of atoms with the Lennard-Jones

        # potential. Finally, a PairsRestraint is used which simply applies the

        # LennardJonesPairScore to each pair in the ClosePairContainer.

        nbl = IMP.container.ClosePairContainer(cont, 4.0)

        nbl.add_pair_filter(nonbonded_pair_filter)

        #should weight the ff restraint by a temperature, set to 300K.

        pr = IMP.container.PairsRestraint(pairscore, nbl)

        rs = IMP.RestraintSet('phys')

        rs.add_restraint(pr)

        rs.set_weight(1.0/(kB*ff_temp))

        m.add_restraint(rs)

        return prot, ff, rsb, rs


    def init_model_setup_scale(self, default, lower=-1, upper=-1):

        """sets up a Scale particle to the initial default value. It can

        optionnally be constrained between two positive bounds, or else its

        range  is 0 to infinity.

        """

        m=self._m

        scale = IMP.isd.Scale.setup_particle(IMP.Particle(m),default,lower,upper)

        #constrain it also for the optimizers

        if lower != -1 or upper != -1:

            m.add_score_state(IMP.core.SingletonConstraint(

                            IMP.isd.ScaleRangeModifier(),None,scale))

        return scale


    def init_model_jeffreys_kappa(self, scales, prior_rs=None):

        """given a list of scales, returns a RestraintSet('prior') with weight

        1.0 that contains a list of vonMisesKappaJeffreysRestraint on each scale.

        If argument prior_rs is used, add them to that RestraintSet instead.

        """

        if not prior_rs:

            prior_rs = IMP.RestraintSet('prior')

            self._m.add_restraint(prior_rs)

            prior_rs.set_weight(1.0)

        for i in scales:

            prior_rs.add_restraint(IMP.isd.vonMisesKappaJeffreysRestraint(i))

        return prior_rs


    def init_model_jeffreys(self, scales, prior_rs=None):

        """given a list of scales, returns a RestraintSet('prior') with weight

        1.0 that contains a list of JeffreysRestraint on each scale.

        If argument prior_rs is used, add them to that RestraintSet instead.

        """

        if not prior_rs:

            prior_rs = IMP.RestraintSet('prior')

            self._m.add_restraint(prior_rs)

            prior_rs.set_weight(1.0)

        for i in scales:

            prior_rs.add_restraint(IMP.isd.JeffreysRestraint(i))

        return prior_rs


    def init_model_conjugate_kappa(self, scales, c, R, prior_rs=None):

        """given a list of scales, returns a RestraintSet('prior') with weight

        1.0 that contains a list of vonMisesKappaConjugateRestraint on each

        scale. If argument prior_rs is used, add them to that RestraintSet

        instead.

        """

        if not (0 <= R <= c):

            raise ValueError, "parameters R and c should satisfy 0 <= R <= c"

        if not prior_rs:

            prior_rs = IMP.RestraintSet('prior')

            self._m.add_restraint(prior_rs)

            prior_rs.set_weight(1.0)

        for i in scales:

            prior_rs.add_restraint(IMP.isd.vonMisesKappaConjugateRestraint(i, c, R))

        return prior_rs


    def find_atom(self, atom, prot):

        """scans the prot hierarchy and tries to find atom = (resno, name)

        assumes that resno follows the same numbering as the sequence.

        Stores already found atoms for increased speed.

        """

        if not hasattr(self,'__memoized'):

            self.__memoized={prot:{}}

        try:

            return self.__memoized[prot][atom]

        except:

            pass

        try:

            sel=IMP.atom.Selection(hierarchy=prot,

                residue_index=atom[0],

                atom_type=IMP.atom.AtomType(atom[1])

                ).get_selected_particles()

            if len(sel) > 1:

                print "found multiple atoms for atom %d %s!" % atom

                return

            p0=IMP.core.XYZ(sel[0])

        except:

            print "atom %d %s not found" % atom

            return

        self.__memoized[prot][atom] = p0

        return p0


    def init_model_vonMises_restraint_full(self, atoms, data, kappa):

        """

        Sets up a vonMises torsion angle restraint using the given kappa

        particle as concentration parameter. Returns the restraint.

        data is a list of observations.

        """

        return IMP.isd.TALOSRestraint(atoms,data,kappa)


    def init_model_vonMises_restraint_mean(self, prot, atoms, data, kappa):

        """

        Sets up a vonMises torsion angle restraint using the given kappa

        particle as concentration parameter. Returns the restraint.

        data is (mean, standard deviation).

        """

        raise NotImplementedError


    def init_model_NOE_restraint(self, prot, atoms, distance, sigma, gamma):

        """

        Sets up a lognormal distance restraint using the given sigma and gamma.

        Returns the restraint.

        assumes atoms = (atom1, atom2)

        where atom1 is (resno, atomname) and resno is the residue sequence

        number.

        """

        #find corresponding atoms

        p0=self.find_atom(atoms[0], prot)

        p1=self.find_atom(atoms[1], prot)

        #create lognormal restraint using gamma_data = 1

        ln = IMP.isd.NOERestraint(p0,p1,sigma,gamma,distance**(-6))

        return ln


    def init_model_ambiguous_NOE_restraint(self, prot, contributions, distance,

            sigma, gamma):

        """Reads an ambiguous NOE restraint. contributions is a list of

        (atom1, atom2) pairs, where atom1 is (resno, atomname).  Sets up a

        lognormal distance restraint using the given sigma and gamma.

        Returns the restraint.

        """

        #create pairs list

        pairs=[(self.find_atom(i, prot).get_particle(),

                self.find_atom(j, prot).get_particle()) for (i,j) in

                contributions]

        pairs = IMP.container.ListPairContainer(pairs)

        #create restraint

        ln = IMP.isd.AmbiguousNOERestraint(pairs, sigma, gamma, distance**(-6))

        return ln


    def init_model_NOEs(self, prot, seqfile, tblfile, name='NOE', prior_rs=None,

            bounds_sigma=(1.0,0.1,100), bounds_gamma=(1.0,0.1,100),

            verbose=True, sequence_match=(1,1)):

        """read TBL file and store NOE restraints, using one sigma and one gamma

        for the whole dataset. Creates the necessary uninformative priors.

        - prot: protein hierarchy

        - seqfile: a file with 3-letter sequence

        - tblfile: a TBL file with the restraints

        - name: an optional name for the restraintset

        - prior_rs: when not None, add new sigma and gamma to this

                    RestraintSet instance.

        - bounds_sigma or gamma: tuple of (initial value, lower, upper bound)

            bounds can be -1 to set to default range [0,+inf]

        - verbose: be verbose (default True)

        - sequence_match : (noe_start, sequence_start)

        Returns: data_rs, prior_rs, sigma, gamma

        """

        #prior

        if verbose:

            print "Prior for the NOE Scales"

        sigma=self.init_model_setup_scale(*bounds_sigma)

        gamma=self.init_model_setup_scale(*bounds_gamma)

        prior_rs = self.init_model_jeffreys([sigma,gamma], prior_rs)

        #likelihood

        rs = IMP.RestraintSet(name)

        #use the TBLReader to parse the TBL file.

        sequence = IMP.isd.utils.read_sequence_file(seqfile,

                first_residue_number=sequence_match[1])

        tblr = IMP.isd.TBLReader.TBLReader(sequence,

                sequence_match=sequence_match)

        restraints = tblr.read_distances(tblfile, 'NOE')['NOE']

        for i,restraint in enumerate(restraints):

            if verbose and i % 100 == 0:

                print "\r%d" % i,

                sys.stdout.flush()

            #a restraint is (contributions, dist, upper, lower, volume)

            #where contributions is a tuple of contributing pairs

            #and a pair is (c1, c2), where c1 is of the form (resno, atname)

            if len(restraint[0]) > 1:

                ln = self.init_model_ambiguous_NOE_restraint(prot, restraint[0],

                        restraint[1], sigma, gamma)

            else:

                ln = self.init_model_NOE_restraint(prot, restraint[0][0],

                        restraint[1], sigma, gamma)

            rs.add_restraint(ln)

        if verbose:

            print "\r%d NOE restraints read" % i

        #set weight of rs and add to model.

        #Weight is 1.0 cause sigma particle already has this role.

        rs.set_weight(1.0)

        self._m.add_restraint(rs)

        return rs, prior_rs, sigma, gamma


    def init_model_NOEs_marginal(self, prot, seqfile, tblfile, name='NOE',

            verbose=True, sequence_match=(1,1)):

        """read TBL file and store NOE restraints, using the marginal of the

        lognormal with one sigma and one gamma, for the whole dataset.

        - prot: protein hierarchy

        - seqfile: a file with 3-letter sequence

        - tblfile: a TBL file with the restraints

        - name: an optional name for the restraintset

        - verbose: be verbose (default True)

        - sequence_match : (noe_start, sequence_start)

        Returns: data_rs

        """

        #likelihood

        rs = IMP.RestraintSet(name)

        #use the TBLReader to parse the TBL file.

        sequence = IMP.isd.utils.read_sequence_file(seqfile,

                first_residue_number=sequence_match[1])

        tblr = IMP.isd.TBLReader.TBLReader(sequence,

                sequence_match=sequence_match)

        restraints = tblr.read_distances(tblfile, 'NOE')['NOE']

        ln = IMP.isd.MarginalNOERestraint()

        for i,restraint in enumerate(restraints):

            if verbose and i % 100 == 0:

                print "\r%d" % i,

                sys.stdout.flush()

            #a restraint is (contributions, dist, upper, lower, volume)

            #where contributions is a tuple of contributing pairs

            #and a pair is (c1, c2), where c1 is of the form (resno, atname)

            #residue numbers start at 0

            pairs=[(self.find_atom(i, prot).get_particle(),

                    self.find_atom(j, prot).get_particle()) for (i,j) in

                    restraint[0]]

            pairs = IMP.container.ListPairContainer(pairs)

            ln.add_contribution(pairs, restraint[4])

        rs.add_restraint(ln)

        if verbose:

            print "\r%d NOE contributions added" % (len(restraints))

        rs.set_weight(1.0)

        self._m.add_restraint(rs)

        return rs


    def init_model_HBonds_marginal(self, prot, seqfile, tblfile, name='NOE',

            verbose=True):

        """read TBL file and store lognormal restraints, using the marginal of the

        lognormal with one sigma and gamma=1, for the whole dataset.

        - prot: protein hierarchy

        - seqfile: a file with 3-letter sequence

        - tblfile: a TBL file with the restraints

        - name: an optional name for the restraintset

        - verbose: be verbose (default True)

        Returns: data_rs

        """

        #likelihood

        rs = IMP.RestraintSet(name)

        #use the TBLReader to parse the TBL file.

        sequence = IMP.isd.utils.read_sequence_file(seqfile)

        tblr = IMP.isd.TBLReader.TBLReader(sequence)

        restraints = tblr.read_distances(tblfile, 'HBond')['HBond']

        ln = IMP.isd.MarginalHBondRestraint()

        for i,restraint in enumerate(restraints):

            if verbose and i % 100 == 0:

                print "\r%d" % i,

                sys.stdout.flush()

            #a restraint is (contributions, dist, upper, lower, volume)

            #where contributions is a tuple of contributing pairs

            #and a pair is (c1, c2), where c1 is of the form (resno, atname)

            #residue numbers start at 0

            pairs=[(self.find_atom(i, prot).get_particle(),

                    self.find_atom(j, prot).get_particle()) for (i,j) in

                    restraint[0]]

            pairs = IMP.container.ListPairContainer(pairs)

            ln.add_contribution(pairs, restraint[4])

        rs.add_restraint(ln)

        if verbose:

            print "\r%d Hbond contributions added" % (len(restraints))

        rs.set_weight(1.0)

        self._m.add_restraint(rs)

        return rs


    def init_model_TALOS(self, prot, seqfile, talos_data, fulldata=True,

            sequence_match=(1,1),name='TALOS', prior_rs=None,

            bounds_kappa=(1.0, 0.1,10), verbose=True, prior='jeffreys',

            keep_all=False):

        """read TALOS dihedral angle data, and create restraints for phi/psi

        torsion angles, along with the prior for kappa, which is a scale for the

        whole dataset, compare to 1/sigma**2 in the NOE case.

        - prot: protein hierarchy

        - seqfile: a file with 3-letter sequence

        - talos_data: either a file (pred.tab), or a folder (pred/) in which all

                      files in the form res???.tab can be found. If possible,

                      try to provide the folder, as statistics are more

                      accurate in that case.

        - fulldata : either True or False, whether the data is the full TALOS

                     output (predAll.tab or pred/ folder), or just the averages

                     (pred.tab)

        - sequence_match : (talos_no, sequence_no) to adjust for different

                           residue numberings

        - name: an optional name for the restraintset

        - prior_rs: when not None, add new kappa(s) to this RestraintSet instance.

        - bounds_kappa: tuple of (initial value, lower, upper bound)

                        bounds can be -1 to set to default range [0,+inf]

        - verbose: be verbose (default True)

        - prior: either 'jeffreys' or a tuple (R,c), which signifies to use the

                 conjugate prior of the von Mises restraint, with parameters R

                 and c.  Good values are R=0 and c=10. Default: jeffreys prior.

        - keep_all: in case of a folder for 'talos_data', whether to keep

                    candidates marked as 'outliers' by TALOS, or to include them.

        Returns: data_rs, prior_rs, kappa


        """

        #prior

        if verbose:

            print "Prior for von Mises Kappa"

        kappa=self.init_model_setup_scale(*bounds_kappa)

        prior_rs = self.init_model_jeffreys_kappa([kappa], prior_rs)

        #likelihood

        if verbose:

            print "reading data"

        rs=IMP.RestraintSet(name)

        sequence= IMP.isd.utils.read_sequence_file(seqfile,

                first_residue_no=sequence_match[1])

        if fulldata:

            talosr=IMP.isd.TALOSReader.TALOSReader(sequence, True, keep_all,

                    sequence_match=sequence_match)

            if os.path.isdir(talos_data):

                #using pred/res???.tab files

                for i,res in enumerate(glob(os.path.join(talos_data,'res???.tab'))):

                    if verbose and i % 100:

                        print "\r%d" % i,

                        sys.stdout.flush()

                    talosr.read(res)

            else:

                #using predAll.tab file

                talosr.read(talos_data)

        else:

            #using pred.tab file and correcting for estimates

            talosr=IMP.isd.TALOSReader.TALOSReader(sequence, False, keep_all,

                    sequence_match=sequence_match)

            talosr.read(talos_data)

        #get harvested data and create restraints

        data = talosr.get_data()

        if verbose:

            print "\rread dihedral data for %d residues" % len(data)

            print "creating restraints"

        avgR=[]

        for resno,datum in data.iteritems():

            phidata=datum['phi']

            psidata=datum['psi']

            num=datum['num']

            res=IMP.atom.Residue(IMP.atom.get_residue(prot, resno))

            phi=IMP.atom.get_phi_dihedral_atoms(res)

            psi=IMP.atom.get_psi_dihedral_atoms(res)

            if fulldata:

                r=self.init_model_vonMises_restraint_full(phi, phidata, kappa)

                rs.add_restraint(r)

                if verbose:

                    avgR.append(r.get_R0())

                r=self.init_model_vonMises_restraint_full(psi, psidata, kappa)

                rs.add_restraint(r)

                if verbose:

                    avgR.append(r.get_R0())

            else:

                r=self.init_model_vonMises_restraint_mean(phi, phidata, kappa)

                rs.add_restraint(r)

                if verbose:

                    avgR.append(r.get_R0())

                r=self.init_model_vonMises_restraint_mean(psi, psidata, kappa)

                rs.add_restraint(r)

                if verbose:

                    avgR.append(r.get_R0())

        if verbose:

            print "%s Restraints created. Average R0: %f" % \

                (len(avgR), sum(avgR)/float(len(avgR)))

        rs.set_weight(1.0)

        self._m.add_restraint(rs)

        return rs, prior_rs, kappa


    def init_model_standard_SAXS_restraint(self, prot, profilefile, name='SAXS',

            ff_type=IMP.saxs.HEAVY_ATOMS):

        """read experimental SAXS profile and apply restraint the standard

        way (like foxs)

        Returns: a restraintset and the experimental profile

        """

        rs = IMP.RestraintSet(name)

        saxs_profile = IMP.saxs.Profile(profilefile)

        particles = IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)

        saxs_restraint = IMP.saxs.Restraint(particles, saxs_profile, ff_type)

        rs.add_restraint(saxs_restraint)

        rs.set_weight(1.0)

        self._m.add_restraint(rs)

        return rs, saxs_profile


    def _setup_md(self,prot, temperature=300.0, thermostat='berendsen',

            coupling=500, md_restraints=None, timestep=1.0, recenter=1000,

            momentum=1000):

        """setup molecular dynamics

        - temperature: target temperature

        - thermostat: one of 'NVE', rescale_velocities',

                                'berendsen', 'langevin'

        - coupling: coupling constant (tau (fs) for berendsen,

                                        gamma (/fs) for langevin)

        - md_restraints: if not None, specify the terms of the energy to be used

                            during the md steps via a list of restraints.

        - timestep: in femtoseconds.

        - recenter: recenter the molecule every so many steps (Langevin only)

        - momentum: remove angular momentum every so many steps (Berendsen only)

        Returns: an instance of md and an instance of an OptimizerState (the

        thermostat), or None if NVE.

        """

        ## Molecular Dynamics (from MAX BONOMI)

        md=IMP.atom.MolecularDynamics()

        md.set_model(self._m)

        md.assign_velocities(temperature)

        md.set_time_step(timestep)

        if thermostat == 'NVE':

            os = None

        elif thermostat == 'rescale_velocities':

            os=IMP.atom.VelocityScalingOptimizerState(

                    IMP.atom.get_leaves(prot), temperature, 0)

            md.add_optimizer_state(os)

        elif thermostat == 'berendsen':

            os=IMP.atom.BerendsenThermostatOptimizerState(

                    IMP.atom.get_leaves(prot), temperature, coupling)

            md.add_optimizer_state(os)

            mom = IMP.atom.RemoveRigidMotionOptimizerState(

                    IMP.atom.get_leaves(prot), momentum)

            md.add_optimizer_state(mom)

        elif thermostat == 'langevin':

            os=IMP.atom.LangevinThermostatOptimizerState(

                    IMP.atom.get_leaves(prot), temperature, coupling)

            md.add_optimizer_state(os)

            #cen = IMP.atom.RemoveTranslationOptimizerState(

            #        IMP.atom.get_leaves(prot), recenter)

            #md.add_optimizer_state(cen)

        else:

            raise NotImplementedError, thermostat


        if md_restraints:

            md.set_restraints(md_restraints)


        return md, os


    def _setup_normal_mover(self, particle, floatkey, stepsize):

        """setup NormalMover to move particle's floatkey attribute

        by a gaussian with standard deviation 'stepsize'

        Returns: mover instance.

        """

        cont=IMP.container.ListSingletonContainer(self._m)

        cont.add_particle(particle)

        nm=IMP.core.NormalMover(cont,

                IMP.FloatKeys([floatkey]),stepsize)

        return nm


    def _setup_md_mover(self, md, particles, temperature, n_md_steps=10):

        """setup MDMover using md and particles.

        - md: md instance

        - particles: particles to move, usually the leaves of the protein

          hierarchy

        - n_md_steps: number of md steps to perform on each move

        Returns: mover instance.

        """

        cont=IMP.container.ListSingletonContainer(self._m)

        cont.add_particles(particles)

        return IMP.atom.MDMover(cont, md, temperature, n_md_steps)


    def _setup_mc(self, mover, temperature=300.0, mc_restraints=None):

        """setup monte carlo using a certain mover.

        - mover: mover to use, NormalMover or MDMover usually.

        - temperature: target temperature.

        - mc_restraints: if not None, list of restraints for the metropolis

          criterion.

        Returns: mc instance.

        """

        mc = IMP.core.MonteCarlo(self._m)

        #why is this returning an int?

        mc.add_mover(mover)

        #set same temp as MD, careful with units

        mc.set_kt(kB*temperature)

        #allow to go uphill

        mc.set_return_best(False)

        #update all particles each time

        mc.set_move_probability(1.0)

        if mc_restraints:

            #tell mc to only use restraints involving the particle

            mc.set_restraints(mc_restraints)

        return mc


    def init_simulation_setup_protein_hmc_hopper(self, prot, temperature=300.0,

            gamma=0.01, n_md_steps=10,

            md_restraints=None, mc_restraints=None, timestep=1.0,

            sd_threshold=0.0, sd_stepsize=0.01, sd_maxsteps=100):

        """setup hybrid monte-carlo on protein. Uses basin hopping with steepest

        descent minimization.

        - prot: protein hierarchy.

        - temperature: target temperature.

        - gamma: coupling constant for langevin (/fs)

        - n_md_steps: number of md steps per mc step

        - md_restraints: if not None, specify the terms of the energy to be used

                            during the md steps.

        - mc_restraints: if not None, use these energy terms for the metropolis

                            criterion.

        - timestep: time step for md, in femtoseconds.

        - sd_threshold: stop steepest descent after energy difference drops

                below that threshold

        - sd_stepsize: stepsize to use for the steepest descent, in angstrom.

        - sd_maxsteps: maximum number of steps for steepest descent

        Returns: hmc, mdmover, md and OptimizerState (thermostat)

        """

        raise NotImplementedError

        md, os = self._setup_md(prot, temperature=temperature,

                thermostat='langevin', coupling=gamma,

                md_restraints=md_restraints, timestep=timestep)

        particles=IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)

        mdmover = self._setup_md_mover(md, particles, temperature, n_md_steps)

        hmc = self._setup_mc(mdmover, temperature, mc_restraints)

        sd=IMP.core.SteepestDescent(self._m)

        sd.set_threshold(sd_threshold)

        sd.set_step_size(sd_stepsize)

        hmc.set_local_optimizer(sd)

        hmc.set_local_steps(sd_maxsteps)

        hmc.set_use_basin_hopping(True)

        return hmc, mdmover, md, os


    def init_simulation_setup_protein_hmc_nve(self, prot, temperature=300.0,

            n_md_steps=100, md_restraints=None, mc_restraints=None,

            timestep=1.0):

        """setup hybrid monte-carlo on protein. Uses NVE MD and tries the full

        - prot: protein hierarchy.

        - temperature: target temperature.

        - coupling: coupling constant (tau (fs) for berendsen,

                                        gamma (/fs) for langevin)

        - n_md_steps: number of md steps per mc step

        - md_restraints: if not None, specify the terms of the energy to be used

                            during the md steps.

        - mc_restraints: if not None, use these energy terms for the metropolis

                            criterion.

        - timestep: time step for md, in femtoseconds.

        - sd_threshold: stop steepest descent after energy difference drops

                below that threshold

        - sd_stepsize: stepsize to use for the steepest descent, in angstrom.

        - sd_maxsteps: maximum number of steps for steepest descent

        Returns: hmc, mdmover and md

        """

        prot = self._p['prot']

        md, os = self._setup_md(prot, temperature=temperature,

                thermostat='NVE', coupling=None, timestep=1.0,

                md_restraints=md_restraints)

        particles=IMP.atom.get_by_type(prot, IMP.atom.ATOM_TYPE)

        cont=IMP.container.ListSingletonContainer(self._m)

        cont.add_particles(particles)

        mdmover = PyMDMover(cont, md, n_md_steps)

        mdmover.m=self._m

        #mdmover = IMP.atom.MDMover(cont, md, temperature, n_md_steps)

        mc = PyMC(self._m)

        #mc = IMP.core.MonteCarlo(self._m)

        #why is this returning an int?

        mc.add_mover(mdmover)

        #set same temp as MD, careful with units

        mc.set_kt(kB*temperature)

        #allow to go uphill

        mc.set_return_best(False)

        #update all particles each time

        mc.set_move_probability(1.0)

        return mc, mdmover, md


    def init_simulation_setup_scale_mc(self, scale, temperature=300.0,

            mc_restraints=None, floatkey=IMP.FloatKey("scale"), nm_stepsize=0.1):

        """sets up monte carlo on scale, at a certain target temperature,

        optionnally using a certain set of restraints only.

        - scale: scale particle

        - temperature: target temperature

        - mc_restraints: optional set of restraints from which the energy should

          be drawn instead of the energy of the complete system.

        - floatkey: the floatkey to move.

        - nm_stepsize: the stepsize of the normal mover

        Returns: mc instance, nm instance.

        """

        nm = self._setup_normal_mover(scale, floatkey, nm_stepsize)

        mc = self._setup_mc(nm, temperature, mc_restraints)

        return mc, nm


    def _mc_and_update_nm(self, nsteps, mc, nm, stats_key,

            adjust_stepsize=True):

        """run mc using a normal mover on a single particle,

        update stepsize and store nsteps, acceptance and stepsize in the

        statistics instance self.stat by using the key stats_key.

        """

        #do the monte carlo

        mc.optimize(nsteps)

        naccept = mc.get_number_of_forward_steps()

        #increment the counter for this simulation

        self.stat.increment_counter(stats_key, nsteps)

        #get the acceptance rate, stepsize

        accept = float(naccept)/nsteps

        self.stat.update(stats_key, 'acceptance', accept)

        stepsize = nm.get_sigma()

        self.stat.update(stats_key, 'stepsize', stepsize)

        #update stepsize if needed and requested

        if adjust_stepsize:

            if 0.4 < accept < 0.6:

                return

            if accept < 0.05:

                accept = 0.05

            if accept > 1.0:

                accept = 1.0

            nm.set_sigma(stepsize*2*accept)


    def _hmc_and_update_md(self, nsteps, hmc, mv, stats_key,

            adjust_stepsize=True):

        """run hmc, update stepsize and print statistics. Updates number of MD

        steps to reach a target acceptance between 0.4 and 0.6, sends

        statistics to self.stat. MD steps are always at least 10 and at most 500.

        """

        hmc.optimize(nsteps)

        naccept = hmc.get_number_of_forward_steps()

        self.stat.increment_counter(stats_key, nsteps)

        accept = float(naccept)/nsteps

        self.stat.update(stats_key, 'acceptance', accept)

        mdsteps = mv.get_number_of_steps()

        self.stat.update(stats_key, 'n_md_steps', mdsteps)

        if adjust_stepsize:

            if 0.4 < accept < 0.6:

                return

            mdsteps = int(mdsteps *2**(accept-0.5))

            if mdsteps > 500:

                mdsteps = 500

            if mdsteps < 10:

                mdsteps = 10

            mv.set_nsteps(mdsteps)


    def get_pdb(self, prot):

        """returns a string corresponding to the pdb structure of hierarchy

        prot.

        """

        output = StringIO()

        IMP.atom.write_pdb(prot, output)

        return output.getvalue()


    def get_netcdf(self, prot):

        raise NotImplementedError


    def do_md_protein_statistics(self, md_key, nsteps, md_instance,

            temperature=300.0, prot_coordinates=None):

        """updates statistics for md simulation: target temp, kinetic energy,

        kinetic temperature, writes coordinates and increments counter.

        - md_key: stats md key

        - nsteps: number of steps performed.

        - md_instance: instance of the MolecularDynamics class.

        - temperature: target temperature

        - prot_coordinates: protein coordinates to be passed to the stats class,

          (should be a string)

        """

        self.stat.update(md_key, 'target_temp', temperature)

        kinetic = md_instance.get_kinetic_energy()

        self.stat.update(md_key, 'E_kinetic', kinetic)

        self.stat.update(md_key, 'instant_temp',

                         md_instance.get_kinetic_temperature(kinetic))

        self.stat.update_coordinates(md_key, 'protein', prot_coordinates)

        self.stat.increment_counter(md_key, nsteps)


    def init_stats_add_mc_category(self, stat, name='mc', coord='particle'):

        """shortcut for a frequent series of operations on MC simulations'

        statistics. Creates an entry for acceptance, stepsize and one

        coordinate set printed in the statistics file.

        """

        #create category

        mc_key = stat.add_category(name=name)

        #giving None as argument is a way to create a static entry.

        stat.add_entry(mc_key, entry=Entry('temperature', '%10G', None))

        stat.add_entry(mc_key, entry=Entry('acceptance', '%10G', None))

        stat.add_entry(mc_key, entry=Entry('stepsize', '%10G', None))

        #special call to add coordinates to be dumped

        stat.add_entry(mc_key, entry=Entry(coord, '%10G', None))

        #add the counter to the output

        stat.add_entry(mc_key, name='counter')

        return mc_key


    def init_stats_add_md_category(self, stat, name='md', coord='protein'):

        """shortcut for a frequent series of operations on MD simulations'

        statistics. Creates an entry for target temp, instantaneous temp,

        kinetic energy, and one set of coordinates called 'protein' by

        default.

        """

        #create category

        md_key = stat.add_category(name=name)

        #giving None as argument is a way to create a static entry.

        stat.add_entry(md_key, entry=Entry('target_temp', '%10G', None))

        stat.add_entry(md_key, entry=Entry('instant_temp', '%10G', None))

        stat.add_entry(md_key, entry=Entry('E_kinetic', '%10G', None))

        #special call to add coordinates to be dumped

        stat.add_coordinates(md_key, coord)

        #add the counter to the output

        stat.add_entry(md_key, name='counter')

        return md_key


    def init_stats_add_hmc_category(self, stat, name='hmc', coord='protein'):

        """shortcut for a frequent series of operations on HMC simulations'

        statistics. Adds acceptance, number of MD steps and a trajectory for

        a protein.

        """

        #create category

        hmc_key = stat.add_category(name=name)

        #giving None as argument is a way to create a static entry.

        stat.add_entry(hmc_key, entry=Entry('temperature', '%10G', None))

        stat.add_entry(hmc_key, entry=Entry('acceptance', '%10G', None))

        stat.add_entry(hmc_key, entry=Entry('n_md_steps', '%10G', None))

        stat.add_entry(hmc_key, entry=Entry('E_kinetic', '%10G', None))

        #special call to add coordinates to be dumped

        stat.add_coordinates(hmc_key, coord)

        #add the counter to the output

        stat.add_entry(hmc_key, name='counter')

        return hmc_key


    def rescale_velocities(self, particles, factor):

        """rescale the velocities of a bunch of particles having vx vy and vz

        floatkeys

        """

        keys=[IMP.FloatKey("vx"), IMP.FloatKey("vy"), IMP.FloatKey("vz")]

        for p in particles:

            for k in keys:

                p.set_value(k, p.get_value(k)*factor)