4 This module provides a few methods to improve the efficiency of a replica-exchange simulation
5 by tuning its parameters.
14 from numpy.random
import randint
15 import rpy2.robjects
as robjects
17 kB = 1.3806503 * 6.0221415 / 4184.0
31 robjects.globalenv[
"kB"] = kB
32 _rinverf = r(
'invErf <- function(x) {qnorm((1 + x) /2) / sqrt(2)}')
33 _rerf = r(
'erf <- function(x) {2 * pnorm(x * sqrt(2)) - 1}')
47 return _rinvF(x, d1, d2)[0]
50 def spline(xy, mean, method=None):
51 """spline interpolation of (x,y) coordinates. If interpolation goes
52 negative, replace by mean value.
55 robjects.globalenv[
"x"] = robjects.FloatVector(x)
56 robjects.globalenv[
"y"] = robjects.FloatVector(y)
60 r(
'cvsplinenonbounded <- splinefun(x,y)')
62 r(
'cvsplinenonbounded <- splinefun(x,y,method="%s")' % method)
64 'cvspline <- function(x) { tmp = cvsplinenonbounded(x); if (tmp>0) {tmp} else {%f}}' %
73 def linear_interpolation(xy, mean):
74 """linear interpolation of (x,y) coordinates. No extrapolation possible.
77 robjects.globalenv[
"x"] = robjects.FloatVector(x)
78 robjects.globalenv[
"y"] = robjects.FloatVector(y)
81 _rinterp = r(
'cvspline <- approxfun(x,y)')
92 """perform anova using R and return statistic, p-value, between and within variance"""
96 reps = r.rep(0, len(args[0]))
97 weight = robjects.FloatVector(args[0])
98 for i
in xrange(1, len(args)):
99 reps += r.rep(i, len(args[i]))
100 weight += robjects.FloatVector(args[i])
101 group = r.factor(reps)
102 robjects.globalenv[
"weight"] = weight
103 robjects.globalenv[
"group"] = group
104 lm = r.lm(
"weight ~ group")
108 anova_result = {
'fstat': aov[3][0],
110 'between': aov[2][0],
112 'nsteps': [len(i)
for i
in args],
119 """perform kruskal-wallis rank test"""
123 reps = r.rep(0, len(args[0]))
124 weight = robjects.FloatVector(args[0])
125 for i
in xrange(1, len(args)):
126 reps += r.rep(i, len(args[i]))
127 weight += robjects.FloatVector(args[i])
128 group = r.factor(reps)
129 aov = r(
'kruskal.test')(group, weight)
131 kruskal_result = {
'fstat': aov[0][0],
133 'nsteps': [len(i)
for i
in args],
136 return kruskal_result
139 def ttest(obs, target):
140 """perform a one-sample two-sided t-test on obs against target mean"""
141 test = r(
't.test')(robjects.IntVector(obs), mu=target)
142 return test[0][0], test[2][0]
145 def binom(obs, target):
146 """perform an exact binomial test on the mean of obs against target"""
149 test = r(
'binom.test')(success, trials, p=target)
150 return test[0][0], test[2][0]
154 """perform bartlett's test on the equality of variances of the observations"""
157 group = r.gl(ngroups, nreps)
158 weight = robjects.IntVector(args[0])
160 weight += robjects.IntVector(i)
161 robjects.globalenv[
"weight"] = weight
162 robjects.globalenv[
"group"] = group
163 var = r(
'bartlett.test')(weight, group)
164 return var[0][0], var[2][0]
168 """perform Fligner-Killeen non-parametric test of the variance equality"""
171 group = r.gl(ngroups, nreps)
172 weight = robjects.IntVector(args[0])
174 weight += robjects.IntVector(i)
175 robjects.globalenv[
"weight"] = weight
176 robjects.globalenv[
"group"] = group
177 var = r(
'fligner.test')(weight, group)
178 return var[0][0], var[2][0]
181 def power_test(ar, power=0.8, alpha=0.05):
182 """perform an anova power test and return
183 - the power of the test with this input data
184 - the number of points that would be needed to achieve a default power of 0.8
185 ar: the ouput of anova()
187 result = r(
'power.anova.test')(groups=ar[
'nreps'], n=min(ar[
'nsteps']),
188 between=ar[
'between'], within=ar[
'within'], sig=alpha)
189 prdb(
'the power of this anova was: %.3f' % result[5][0])
190 result = r(
'power.anova.test')(groups=ar[
'nreps'],
191 between=ar[
'between'], within=ar[
'within'], sig=alpha, pow=power)
193 'To have a power of %.3f, there should be at least %d exchange attemps.' %
194 (power, result[1][0]))
198 def minimum_n(ar, alpha=0.05):
199 """This routine tries to return an estimate of the additional number of exchange trials that
200 could lead to a positive result of the anova (e.g. the average ARs are not the same). It is
201 still very crude. It also assumes that a one-way anova was made.
202 ar: the output of anova()
206 nsteps = ar[
'nsteps']
214 (sqrt(Finv(1 - alpha, nreps - 1, nreps * (nsteps - 1)) / fstat) - 1)
221 """When created, estimates the heat capacity from the energies or from the
222 indicator functions using the specified method. Two methods are then
223 available to guess the heat capacity at a given parameter value: get()
224 returns a single point, and mean() returns the linear average between two
228 def __init__(self, params, energies=None, indicators=None,
229 method=
"constant", temps=
None, write_cv=
False):
231 kB = 1.3806503 * 6.0221415 / 4184.0
232 self.__initialized =
False
236 if method ==
"interpolate":
240 elif method ==
"constant":
242 self.get =
lambda p: self.__cv
243 self.mean =
lambda p1, p2: self.__cv
244 self.__cvfun = self.get
245 r(
'cvspline <- function(x) {%f}' % self.__cv)
246 elif method ==
"mbar":
249 self.mean = self.mean_mbar
251 raise NotImplementedError(method)
253 self.__initialized =
True
258 fl.write(
"".join([
"%f %f\n" % (x, self.get(x))
259 for x
in linspace(params[0] / 2, 2 * params[-1])]))
263 """interpolate using previous values, by reversing the approximate overlap
266 if self.__initialized:
268 if len(indicators) != len(params) - 1:
270 "the length of indicators and params does not match!")
271 if params != tuple(sorted(params)):
272 raise NotImplementedError(
273 "unable to work on parameters that do not change monotonically")
275 prdb(
"storing params and means")
276 self.__params = params
277 self.__pmeans = [(params[i] + params[i + 1]) / 2.
278 for i
in xrange(len(params) - 1)]
279 prdb(
"computing __cv")
280 for i, ind
in enumerate(indicators):
281 mean = sum(ind) / float(len(ind))
282 Y2 = 2 * kB * erfinv(1 - mean) ** 2
286 (self.__pmeans[i], (p1 ** 2 + p2 ** 2) * float(Y2) / (p2 - p1) ** 2))
289 self.__cvmean = sum([i[1]
for i
in self.__cv]) / float(len(self.__cv))
290 if self.__cvmean < 0:
291 raise ValueError(
"Cv mean is negative!")
292 self.__cvfun = spline(self.__cv, self.__cvmean)
296 """try to guess which constant cv fits best"""
297 if self.__initialized:
300 self.__cv = self.__cvmean
303 def needs_init(self):
304 if not self.__initialized:
305 raise RuntimeError(
"Class was not initialized correctly!")
308 "use MBAR to get the heat capacity"
309 raise NotImplementedError(method)
310 if self.__initialized:
313 def _isinbounds(self, p, params):
314 """returns True if p is within parms, else false. the params list must be sorted ascendingly."""
315 if p < params[0] - EPSILON
or p > params[-1] + EPSILON:
321 def _interpolate(self, xval, xlist):
322 """return interpolation of Cv at point xval, and return the average instead
323 if this value is negative.
325 self._isinbounds(xval, xlist)
326 val = self.__cvfun(xval)
333 """returns the point estimate of the first derivative of the energy with
334 respect to the replica exchange parameter (usually T or q).
335 This version assumes that the means of cv are given.
338 return self._interpolate(param, self.__pmeans)
341 """returns the point estimate of the first derivative of the energy with
342 respect to the replica exchange parameter (usually T or q).
343 This version assumes that the values of cv are given.
346 return self._interpolate(param, self.__params)
349 """estimate the mean of Cv between two points. Here the means were
353 return self._interpolate((pa + pb) / 2., self.__pmeans)
355 def mean_mbar(self, pa, pb):
362 def update_good_dumb(newp, oldp, *args, **kwargs):
363 """Here the old parameters are oldp[0] and oldp[1], and the starting point
364 is newp[0]. We should modify newp[1] so that the AR in the following cycle
365 is equal to the targetAR.
366 In the "dumb" method, the Cv and targetAR keywords are ignored.
367 Here the newp[1] parameter is modified because prior changes have set
368 newp[0] to a different value than oldp[0]. Thus, we should move newp[1] by
369 minimizing the effect on the AR since it is supposedly equal to targetAR. In
370 this simple method, the parameter is just translated.
373 "newp[0] has moved (%.3f -> %.3f), adjusting the position of newp[1]" %
375 return oldp[1] - (oldp[0] - newp[0])
378 def update_bad_dumb(newp, oldp, ind, targetAR=0.4, scale=0.1, **kwargs):
379 """Here the old parameters are oldp[0] and oldp[1], and the starting point
380 is newp[0]. We should modify newp[1] so that the AR in the following cycle
381 is equal to the targetAR.
382 In the "dumb" method, the Cv keyword is ignored. Here the newp[1]
383 parameter is modified to follow the possible translation of newp[0] by
384 calling update_good_dumb, and then newp[1] is added or substracted scale% of
385 (oldp[1] - oldp[0]) to adjust to targetAR.
388 if newp[0] != oldp[0]:
389 newp[1] = update_good_dumb(newp, oldp)
390 if targetAR > sum(ind) / float(len(ind)):
391 prdb(
"""target AR is higher than expected, decreasing newp[1]""")
392 newp[1] -= scale * (oldp[1] - oldp[0])
394 prdb(
"""target AR is lower than expected, increasing newp[1]""")
395 newp[1] += scale * (oldp[1] - oldp[0])
399 def update_any_cv_step(newp, oldp, ind, targetAR=0.4, Cv=None, **kwargs):
400 """here we use the average AR formula of two gaussians to get newp[1] as a
401 function of newp[1], knowing the targetAR and estimating the Cv. If targetAR
402 is negative, consider that mean(ind) equals the target AR and skip any
403 calculation in the case that oldp[0] equals newp[0].
404 step: suppose the heat capacity is stepwise constant, i.e. use the heat
405 capacity at position newp[0] as an estimate of the mean of the heat capacity
406 between newp[0] and newp[1]. This does not require any self-consistent loop.
411 if abs(oldp[0] - newp[0]) < EPSILON
and targetAR < 0:
414 targetAR = sum(ind) / float(len(ind))
416 Y = sqrt(2 * kB) * float(erfinv(1 - targetAR))
418 raise ValueError(
"""targetAR too small for this approximate method, use
419 the full self-consistent method instead.""")
420 return newp[0] * (cv + Y * sqrt(2 * cv - Y ** 2)) / (cv - Y ** 2)
423 def update_any_cv_sc(newp, oldp, ind, targetAR=0.4, Cv=None,
424 tol=1e-6, maxiter=10000):
425 """self-consistent solver version"""
429 if abs(oldp[0] - newp[0]) < EPSILON
and targetAR < 0:
432 targetAR = sum(ind) / float(len(ind))
434 Y = sqrt(2 * kB) * float(erfinv(1 - targetAR))
436 raise ValueError(
"""targetAR too small for this approximate method, use
437 the full self-consistent method instead.""")
438 targetp = newp[0] * (cv + Y * sqrt(2 * cv - Y ** 2)) / (cv - Y ** 2)
439 for i
in xrange(maxiter):
440 cv = Cv.mean(newp[0], targetp)
441 (oldtargetp, targetp) = (
442 targetp, newp[0] * (cv + Y * sqrt(2 * cv - Y ** 2)) / (cv - Y ** 2))
443 if abs(targetp - oldtargetp) <= tol:
447 raise ValueError(
"""targetAR too small for this approximate method, use
448 the full self-consistent method instead.""")
450 raise ValueError(
"""something unexpected happened""")
452 prdb(
"""Warning: unable to converge the self-consistent after %d
453 iterations and a tolerance of %f. Change the method or decrease the
454 tolerance!""" % (maxiter, tol))
455 prdb(
"converged after %d iterations and a tolerance of %f for x=%f" %
460 def update_any_cv_scfull(newp, oldp, ind, targetAR=0.4, Cv=None,
461 tol=1e-6, maxiter=10000):
462 """self-consistent solver version, on the exact average AR equation"""
467 'u21 <- function(t1,t2) { integrate(Vectorize(cvspline),t1,t2)$value }')
468 _rb21 = r(
'b21 <- function(t1,t2) { 1./(kB*t2) - 1./(kB*t1) }')
469 _rsigma2 = r(
'sigma2 <- function(t) {cvspline(t)*kB*t**2}')
470 _rovboltz = r(
'ovboltz <- function(t1,t2) {\
472 u21(t1,t2)/sqrt(2*(sigma2(t1)+sigma2(t2))))\
473 + exp(b21(t1,t2)*(u21(t1,t2)+b21(t1,t2)*(sigma2(t1)+sigma2(t2))/2))\
474 * (1+erf((u21(t1,t2)+b21(t1,t2)*(sigma2(t1)+sigma2(t2)))/(sqrt(2*(sigma2(t1)+sigma2(t2))))))\
477 'rootfn <- function(t2) {ovboltz(%f,t2)-%f}' %
481 if oldp[1] > oldp[0]:
486 while _rrootfn(tmp)[0] >= 0:
488 tmp += (oldp[1] - oldp[0])
490 raise RuntimeError(
"heat capacity goes negative")
492 raise RuntimeError(
'could not find zero of function!')
496 'uniroot(rootfn,c(%f,%f),f.lower = %f, f.upper = %f, tol = %f, maxiter = %d)' % (newp[0], tmp,
497 1 - targetAR, -targetAR, tol, maxiter))
499 "self-consistent solver converged after %s iterations and an estimated precision of %s " %
500 (_runiroot[2][0], _runiroot[3][0]))
506 _runiroot[0][0])[0]])
507 return _runiroot[0][0]
510 def update_any_cv_nr(newp, oldp, ind, targetAR=0.4, Cv=None, **kwargs):
511 """newton-raphson solver version"""
514 raise NotImplementedError
519 def are_equal_to_targetAR(
524 """here, all indicators have same average, we want to know if it is equal to
529 deviations = sorted([(abs(sum(ind) / float(len(ind)) - targetAR), ind)
530 for pos, ind
in enumerate(indicators)])
531 deviant = deviations[-1]
534 if method ==
"ttest":
537 elif method ==
"binom":
540 raise NotImplementedError
543 test, pval = ttest(deviant[1], targetAR)
545 if abs(targetAR - sum(deviant[1]) / len(deviant[1])) > EPSILON:
555 def are_stationnary(indicators, alpha=0.05, method="anova"):
556 """test on the stationarity of the observations (block analysis). Done so by
557 launching an anova on the difference between the two halves of each observations.
560 if method ==
"kruskal":
565 tmp = array(indicators)
566 blocklen = len(indicators[0]) / 2
567 block = tmp[:, :blocklen] - tmp[:, blocklen:2 * blocklen]
568 if test(*block)[
'pval'] < alpha:
574 def are_equal(indicators, targetAR=0.4, alpha=0.05,
575 method=
"anova", varMethod=
"skip", power=0.8):
576 """Perform a one-way ANOVA or kruskal-wallis test on the indicators set,
577 and return True if one cannot exclude with risk alpha that the indicators
578 AR are different (i.e. True = all means are equal). Also performs a test
579 on the variance (they must be equal).
582 if min(targetAR, 1 - targetAR) * len(indicators[0]) <= 5
and \
583 (varMethod ==
"bartlett" or method ==
"anova"):
584 prdb(
"Warning: normal approximation to the binomial does not hold!")
587 if varMethod ==
"skip":
590 if varMethod ==
"bartlett":
591 pval = bartlett(*indicators)[1]
592 elif varMethod ==
"fligner":
593 pval = killeen(*indicators)[1]
595 raise NotImplementedError(
596 "variance testing method unknown: %s" %
599 prdb(
"Warning: performing mean test with unequal variances.")
601 if method ==
"kruskal":
606 tr = test(*indicators)
609 tr[
'result'] = tr[
'pval'] >= alpha
621 def find_good_ARs(indicators, targetAR=0.4, alpha=0.05, method="binom"):
622 """perform one-sample t-tests on each of the data sets, and return
623 a tuple of bool of size N-1, False if AR i is not equal to targetAR
628 means = sorted([(sum(ind) / float(len(ind)), pos, ind)
629 for pos, ind
in enumerate(indicators)])
632 if method ==
"ttest":
635 elif method ==
"binom":
638 raise NotImplementedError
642 prdb(
"starting left")
643 for (i, (mean, pos, ind))
in enumerate(means):
645 "performing t-test on couple %d having average AR %f, position %d" %
648 test, pval = ttest(ind, targetAR)
650 if abs(targetAR - mean) > EPSILON:
656 isGoodTuple.append((pos,
False))
661 prdb(
"starting right")
662 for (i, (mean, pos, ind))
in enumerate(reversed(means)):
663 prdb(
"performing t-test on couple %d having average AR %f, position %d"
664 % (pos, mean, len(means) - 1 - i))
665 if ttest(ind, targetAR)[1] < alpha:
667 isGoodTuple.append((pos,
False))
669 goodstop = len(means) - 1 - i
673 if len(isGoodTuple) > len(indicators):
674 return tuple([
False] * len(indicators))
676 elif len(isGoodTuple) == 0:
677 return tuple([
True] * len(indicators))
680 isGoodTuple.extend([(means[i][1],
True)
for i
in
681 xrange(goodstart, goodstop + 1)])
683 return tuple([tup[1]
for tup
in isGoodTuple])
688 def mean_first_passage_times(
693 """compute mean first passage times as suggested in
694 Nadler W, Meinke J, Hansmann UHE, Phys Rev E *78* 061905 (2008)
696 use_avgAR : if a list of average ARs is given computes everything from
697 average AR; if it is False, compute by direct counting of events.
700 If use_avgAR == False:
701 tau0, tauN, chose_N, times0, timesN
703 tau0, tauN, None, None, None
704 tau0[i]: average time to go from replica 0 to replica i
705 tauN[i]: average time to go from replica N to replica i
706 times0 and timesN are the corresponding lists of single events.
710 from numpy
import array, zeros
711 from pprint
import pprint, pformat
712 replicanums = array(replicanums_ori)[:, start::subs]
720 for state
in xrange(1, N):
721 tau0[state] = tau0[state - 1] + \
722 state / (float(use_avgAR[state - 1]))
723 for state
in reversed(xrange(1, N)):
724 tauN[state - 1] = tauN[state] \
725 + (N - (state - 1)) / (float(use_avgAR[state - 1]))
727 return tau0, tauN,
None,
None,
None
731 from itertools
import izip
736 store0 = zeros((N, N), dtype=bool)
737 last0 = [0
for i
in xrange(N)]
738 already0 = [
False for i
in xrange(N)]
739 times0 = [[]
for i
in xrange(N)]
740 storeN = zeros((N, N), dtype=bool)
741 lastN = [0
for i
in xrange(N)]
742 alreadyN = [
False for i
in xrange(N)]
743 timesN = [[]
for i
in xrange(N)]
746 for time, frame
in enumerate(izip(*replicanums)):
748 if not already0[frame[0]]:
749 last0[frame[0]] = time
750 store0[frame[0], :] =
True
751 already0[frame[0]] =
True
753 if not alreadyN[frame[-1]]:
754 lastN[frame[-1]] = time
755 storeN[frame[-1], :] =
True
756 alreadyN[frame[-1]] =
True
758 already0[frame[1]] =
False
759 alreadyN[frame[-2]] =
False
761 for state, rep
in enumerate(frame):
762 if store0[rep, state]:
764 store0[rep, state] =
False
766 times0[state].append(time - last0[rep])
767 if storeN[rep, state]:
769 storeN[rep, state] =
False
771 timesN[state].append(time - lastN[rep])
773 times = [[]
for i
in xrange(N)]
774 chose_N = [len(timesN[state]) > len(times0[state])
for state
in
776 for state
in xrange(N):
777 tauN[state] = sum(timesN[state]) / float(len(timesN[state]))
778 tau0[state] = sum(times0[state]) / float(len(times0[state]))
781 return tau0, tauN, chose_N, times0, timesN
784 def compute_effective_fraction(tau0, tauN, chose_N):
785 """input: tau0, tauN, chose_N
786 output: effective fraction f(T) (P_up(n)) as introduced in
787 Trebst S, Troyer M, Hansmann UHE, J Chem Phys *124* 174903 (2006).
789 Nadler W, Hansmann UHE, Phys Rev E *75* 026109 (2007)
790 and whose calculation is enhanced in
791 Nadler W, Meinke J, Hansmann UHE, Phys Rev E *78* 061905 (2008)
792 the ideal value of f(n) should be 1 - n/N
800 nstar = N - sum([int(a)
for a
in chose_N]) - 1
802 prdb(
"n* = %d" % nstar)
806 for state
in xrange(2, nstar + 1):
807 h0[state] = h0[state - 1] + \
808 (tau0[state] - tau0[state - 1]) / float(state)
812 for state
in reversed(xrange(nstar, N - 1)):
813 hN[state] = hN[state + 1] + \
814 (tauN[state] - tauN[state + 1]) / float(N - state)
819 for n
in xrange(1, nstar + 1):
820 pup[n] = 1 - h0[n] / (h0[nstar] + hN[nstar])
821 for n
in xrange(nstar + 1, N):
822 pup[n] = hN[n] / (h0[nstar] + hN[nstar])
827 def spline_diffusivity(pup, params):
828 """spline interpolation of diffusivity: D = 1/(df/dT * heta)
830 from numpy
import linspace
831 robjects.globalenv[
"hetay"] = \
832 robjects.FloatVector(linspace(0, 1, num=len(params)).tolist())
833 robjects.globalenv[
"hetax"] = robjects.FloatVector(params)
834 robjects.globalenv[
"pupx"] = robjects.FloatVector(params)
835 robjects.globalenv[
"pupy"] = robjects.FloatVector(pup)
836 heta = r(
'heta <- splinefun(hetax,hetay,method="monoH.FC")')
837 eff = r(
'eff <- splinefun(pupx,pupy,method="monoH.FC")')
838 diff = r(
'diff <- function(x) {-1/(heta(x,deriv=1)*eff(x,deriv=1))}')
839 return lambda x: diff(x)[0]
843 def compute_indicators(replicanums, subs=1, start=0):
844 """input: replicanums : a list of N lists of size M, where N is the number of
845 states and M is the length of the simulation. Each element is an integer,
846 and corresponds to the label of a replica.
847 output: an indicator function of exchanges (size (N-1)x(M-1)), 1 if exchange and
851 if replicanums[n][m] == replicanums[n + 1][m + 1] \
852 and replicanums[n][m + 1] == replicanums[n + 1][m]:
858 for n
in xrange(len(replicanums) - 1):
859 indicators.append([exchange(n, m)
860 for m
in xrange(len(replicanums[n]) - 1)][start::subs])
866 def update_params_nonergodic(pup, params, write_g=False, num=False):
868 from numpy
import linspace
870 g = linear_interpolation(zip(pup, params), 0)
872 d = spline_diffusivity(pup, params)
874 fl.write(
"".join([
"%f %f\n" % (x, g(x))
875 for x
in linspace(0, 1, num=100)]))
877 fl = open(
'diffusivity',
'w')
878 fl.write(
''.join([
"%f %f\n" % (x, d(x))
for x
in
879 linspace(params[0], params[-1], num=100)]))
881 fl = open(
'pup',
'w')
882 fl.write(
"".join([
"%f %f\n" % (i, j)
for (i, j)
in zip(params, pup)]))
886 newparams = [g(i)
for i
in reversed(linspace(0, 1, num=len(params)))]
888 newparams = [g(i)
for i
in reversed(linspace(0, 1, num=num))]
890 newparams[0] = params[0]
891 newparams[-1] = params[-1]
897 indicators, params, isGood, targetAR=0.4, immobilePoint=1,
898 Cv=
None, badMethod=
"dumb", goodMethod=
"dumb", dumb_scale=0.1):
899 """update the parameters according to the isGood tuple and using the
902 newparams = list(params)
904 if immobilePoint != 1:
905 raise NotImplementedError
907 if Cv
is None and (badMethod !=
"dumb" or goodMethod !=
"dumb"):
908 raise RuntimeError(
"""Cv needs to be estimated if using other methods
909 than 'dumb' for updating!""")
911 if goodMethod ==
"dumb":
912 update_good = update_good_dumb
913 elif goodMethod ==
"step":
914 update_good = update_any_cv_step
915 elif goodMethod ==
"sc":
916 update_good = update_any_cv_sc
917 elif goodMethod ==
"scfull":
918 update_good = update_any_cv_scfull
919 elif goodMethod ==
"nr":
920 update_good = update_any_cv_nr
922 raise NotImplementedError(goodMethod)
923 if badMethod ==
"dumb":
924 update_bad = update_bad_dumb
925 elif badMethod ==
"step":
926 update_bad = update_any_cv_step
927 elif badMethod ==
"sc":
928 update_bad = update_any_cv_sc
929 elif badMethod ==
"scfull":
930 update_bad = update_any_cv_scfull
931 elif badMethod ==
"nr":
932 update_bad = update_any_cv_nr
934 raise NotImplementedError(badMethod)
937 for pos
in xrange(len(params) - 1):
939 newparams[pos + 1] = update_good(
940 newparams[pos:pos + 2], params[pos:pos + 2],
941 indicators[pos], targetAR=targetAR, Cv=Cv)
943 newparams[pos + 1] = update_bad(
944 newparams[pos:pos + 2], params[pos:pos + 2],
945 indicators[pos], targetAR=targetAR, Cv=Cv, scale=dumb_scale)
947 return tuple(newparams)
950 def tune_params_flux(replicanums, params, subs=1, start=0, alpha=0.05,
951 testMethod=
'anova', meanMethod=
'binom', use_avgAR=
False,
952 power=0.8, num=
False):
957 if use_avgAR
is not False:
958 raise NotImplementedError
960 prdb(
"computing mean first passage times")
961 tau0, tauN, chose_N, times0, timesN = mean_first_passage_times(replicanums,
962 subs=subs, start=start, use_avgAR=use_avgAR)
964 prdb(
"average round trip time: %.2f (%d+%d events)" %
965 (tau0[-1] + tauN[0], len(times0[-1]), len(timesN[0])))
966 prdb(
"checking if the parameterset needs to be improved")
971 nstar = N - sum([int(a)
for a
in chose_N]) - 1
975 for n
in xrange(1, N - 1):
977 reduced.append([i * 2.0 / ((N - n) * (N - n + 1))
980 reduced.append([i * 2.0 / (n * (n + 1))
for i
in times0[n]])
982 anova_result = are_equal(reduced, alpha=alpha, method=testMethod,
984 if (anova_result[
'result']):
985 prdb(
"flux is constant, nothing to do!")
986 min_n = minimum_n(anova_result, alpha)
987 prdb(
'Try to rerun this test with at least %d more samples.' %
989 return (
False, min_n)
993 prdb(
"parameterset not optimal, computing effective fraction")
994 pup = compute_effective_fraction(tau0, tauN, chose_N)
997 prdb(
"returning new parameterset")
998 params = update_params_nonergodic(pup, params, num=num)
1000 return (
True, params)
1004 indicators, params, targetAR=0.4, alpha=0.05, immobilePoint=1, CvMethod=
"skip", badMethod=
"dumb", goodMethod=
"dumb",
1005 varMethod=
"skip", testMethod=
"anova", meanMethod=
"binom",
1006 energies=
None, temps=
None, power=0.8, dumb_scale=0.1):
1007 """Tune the replica-exchange parameters and return a new set.
1010 indicators -- an (N-1)x(M-1) table, where entry at position (j,i)
1011 is True if replica j and j+1 performed an exchange between
1013 params -- the current set of N parameters used in the simulation.
1016 targetAR -- the target AR which is wanted for the simulation
1018 alpha -- the type 1 error on the one-way ANOVA and subsequent
1019 t-tests (default: 5%)
1020 immobilePoint -- which replica should keep it's parameter fixed
1021 (default: 1st replica, e.g. 1)
1022 CvMethod -- the heat capacity estimation method (default:
1023 "skip", other options: "mbar", "spline", "constant")
1024 badMethod -- how to correct the (j+1)th parameter if the
1025 acceptance ratio between replicas j and j+1 is off the
1026 target value (default: "dumb", options: "step", "sc", "scfull", "nr")
1027 goodMethod -- how to update the value of the (j+1)th parameter
1028 in the case of a correctly exchanging couple, but if the jth
1029 parameter has been modified (default: "dumb",options: "step",
1030 "sc" self-consistent, "scfull" self-consistent using the exact equation,
1031 "nr" newton-raphson solver for the exact equation)
1032 dumb_scale -- (0.0-1.0) in the "dumb" method, scale wrong temperature
1033 intervals by this amount. (default: 0.1)
1034 testMethod -- how to test for the difference of the means,
1035 either "anova" for a one-way anova, or "kruskal" for a
1036 Kruskal-Wallis one-way non-parametric anova.
1037 meanMethod -- "ttest" for a two-sided one-sample t-test,
1038 "binom" for an exact binomial test of the probability.
1039 varMethod -- how to test for the equality of variances.
1040 "fligner" for the Fligner-Killeen non-parametric test,
1041 "bartlett" for Bartlett's test, "skip" to pass.
1042 energies -- if CvMethod is set to "mbar", the energies of each
1043 state as a function of time are used to estimate the heat capacity.
1044 temps -- the temperatures of the simulations, if estimating with "mbar".
1047 returns a tuple: (bool, params). bool is True if params have
1048 changed, and params is the new set.
1053 prdb(
"performing ANOVA")
1054 anova_result = are_equal(indicators, targetAR, alpha, method=testMethod,
1055 varMethod=varMethod, power=power)
1056 if (anova_result[
'result']
and
1057 are_equal_to_targetAR(indicators, targetAR, alpha, method=meanMethod)):
1058 prdb(
"all means are equal to target AR, nothing to do!")
1059 min_n = minimum_n(anova_result, alpha)
1061 'Try to rerun this test with at least %d more samples.' %
1063 return (
False, min_n)
1064 prdb(
"some means are different, performing t-tests")
1067 isGood = find_good_ARs(indicators, targetAR, alpha, method=meanMethod)
1068 if not (
False in isGood):
1069 prdb(
"""Bad luck: ANOVA says means are not identical, but t-tests
1070 can't find the bad rates...""")
1071 return (
False, params)
1075 prdb(
"performing stationarity test")
1076 if not are_stationnary(indicators, alpha):
1077 prdb(
"Warning: Some simulations are not stationary!")
1081 prdb(
"launching Cv estimation or skipping it")
1082 if CvMethod ==
"skip":
1084 elif CvMethod ==
"interpolate" or CvMethod ==
"constant":
1085 Cv =
CvEstimator(params, indicators=indicators, method=CvMethod)
1086 elif CvMethod ==
"mbar":
1093 raise NotImplementedError(CvMethod)
1096 prdb(
'updating params')
1098 params = update_params(indicators, params, isGood, targetAR=targetAR,
1099 immobilePoint=immobilePoint, Cv=Cv,
1100 badMethod=badMethod, goodMethod=goodMethod, dumb_scale=dumb_scale)
1103 return (
True, params)
1105 if __name__ ==
'__main__':
1108 for i
in xrange(1, 8):
1109 replicanums.append(tuple(fromfile(
'data/replica-indices/%d.rep' % i,
1110 dtype=int, sep=
'\n')))
1112 prdb(
"replicanums: %dx%d" % (len(replicanums), len(replicanums[0])))
1113 params = tuple(fromfile(
'data/temperatures', sep=
' '))
1116 indicators = compute_indicators(replicanums, subs=1, start=0)
1117 prdb(
"indicators: %dx%d" % (len(indicators), len(indicators[0])))
1118 prdb(
"Exchange rate:")
1119 prdb(array([sum(ind) / float(len(ind))
for ind
in indicators]))
1120 array([sum(ind) / float(len(ind))
for ind
in
1121 indicators]).tofile(
'xchgs', sep=
'\n')
1122 changed, newparams = tune_params(indicators, params, targetAR=0.25,
1123 badMethod=
"dumb", goodMethod=
"dumb", CvMethod=
"skip", testMethod=
"anova", alpha=0.05)
1125 print "Parameter set seems optimal."
1127 if not True in [abs(newparams[i + 1] - newparams[i]) < 1e-3
for i
in xrange(len(newparams) - 1)]:
1128 array(newparams).tofile(
'data/temperatures', sep=
' ')
1130 print "PROBLEM IN NEW PARAMETERSET -> not saved"
1131 print "params :", params
1132 print "new params:", newparams
def estimate_cv_constant
try to guess which constant cv fits best
def estimate_cv_interpolate
interpolate using previous values, by reversing the approximate overlap function
When created, estimates the heat capacity from the energies or from the indicator functions using the...
def mean_interp
estimate the mean of Cv between two points.
def estimate_cv_mbar
use MBAR to get the heat capacity
def get_mbar
returns the point estimate of the first derivative of the energy with respect to the replica exchange...
bool isnan(const T &a)
Return true if a number is NaN.
def get_interp
returns the point estimate of the first derivative of the energy with respect to the replica exchange...