5 This module provides a few methods to improve the efficiency of a
6 replica-exchange simulation by tuning its parameters.
11 import rpy2.robjects
as robjects
13 kB = 1.3806503 * 6.0221415 / 4184.0
27 robjects.globalenv[
"kB"] = kB
28 _rinverf = r(
'invErf <- function(x) {qnorm((1 + x) /2) / sqrt(2)}')
29 _rerf = r(
'erf <- function(x) {2 * pnorm(x * sqrt(2)) - 1}')
43 return _rinvF(x, d1, d2)[0]
46 def spline(xy, mean, method=None):
47 """spline interpolation of (x,y) coordinates. If interpolation goes
48 negative, replace by mean value.
51 robjects.globalenv[
"x"] = robjects.FloatVector(x)
52 robjects.globalenv[
"y"] = robjects.FloatVector(y)
56 r(
'cvsplinenonbounded <- splinefun(x,y)')
58 r(
'cvsplinenonbounded <- splinefun(x,y,method="%s")' % method)
60 'cvspline <- function(x) { tmp = cvsplinenonbounded(x); '
61 'if (tmp>0) {tmp} else {%f}}' %
70 def linear_interpolation(xy, mean):
71 """linear interpolation of (x,y) coordinates. No extrapolation possible.
74 robjects.globalenv[
"x"] = robjects.FloatVector(x)
75 robjects.globalenv[
"y"] = robjects.FloatVector(y)
78 _rinterp = r(
'cvspline <- approxfun(x,y)')
89 """perform anova using R and return statistic, p-value, between and
94 reps = r.rep(0, len(args[0]))
95 weight = robjects.FloatVector(args[0])
96 for i
in range(1, len(args)):
97 reps += r.rep(i, len(args[i]))
98 weight += robjects.FloatVector(args[i])
99 group = r.factor(reps)
100 robjects.globalenv[
"weight"] = weight
101 robjects.globalenv[
"group"] = group
102 lm = r.lm(
"weight ~ group")
106 anova_result = {
'fstat': aov[3][0],
108 'between': aov[2][0],
110 'nsteps': [len(i)
for i
in args],
117 """perform kruskal-wallis rank test"""
121 reps = r.rep(0, len(args[0]))
122 weight = robjects.FloatVector(args[0])
123 for i
in range(1, len(args)):
124 reps += r.rep(i, len(args[i]))
125 weight += robjects.FloatVector(args[i])
126 group = r.factor(reps)
127 aov = r(
'kruskal.test')(group, weight)
129 kruskal_result = {
'fstat': aov[0][0],
131 'nsteps': [len(i)
for i
in args],
134 return kruskal_result
137 def ttest(obs, target):
138 """perform a one-sample two-sided t-test on obs against target mean"""
139 test = r(
't.test')(robjects.IntVector(obs), mu=target)
140 return test[0][0], test[2][0]
143 def binom(obs, target):
144 """perform an exact binomial test on the mean of obs against target"""
147 test = r(
'binom.test')(success, trials, p=target)
148 return test[0][0], test[2][0]
152 """perform bartlett's test on the equality of variances of the
156 group = r.gl(ngroups, nreps)
157 weight = robjects.IntVector(args[0])
159 weight += robjects.IntVector(i)
160 robjects.globalenv[
"weight"] = weight
161 robjects.globalenv[
"group"] = group
162 var = r(
'bartlett.test')(weight, group)
163 return var[0][0], var[2][0]
167 """perform Fligner-Killeen non-parametric test of the variance equality"""
170 group = r.gl(ngroups, nreps)
171 weight = robjects.IntVector(args[0])
173 weight += robjects.IntVector(i)
174 robjects.globalenv[
"weight"] = weight
175 robjects.globalenv[
"group"] = group
176 var = r(
'fligner.test')(weight, group)
177 return var[0][0], var[2][0]
180 def power_test(ar, power=0.8, alpha=0.05):
181 """perform an anova power test and return
182 - the power of the test with this input data
183 - the number of points that would be needed to achieve a default
185 ar: the output of anova()
187 result = r(
'power.anova.test')(groups=ar[
'nreps'], n=min(ar[
'nsteps']),
188 between=ar[
'between'], within=ar[
'within'],
190 prdb(
'the power of this anova was: %.3f' % result[5][0])
191 result = r(
'power.anova.test')(groups=ar[
'nreps'],
192 between=ar[
'between'], within=ar[
'within'],
193 sig=alpha, pow=power)
194 prdb(
'To have a power of %.3f, there should be at least %d exchange '
195 'attempts.' % (power, result[1][0]))
199 def minimum_n(ar, alpha=0.05):
200 """This routine tries to return an estimate of the additional number of
201 exchange trials that could lead to a positive result of the anova (e.g.
202 the average ARs are not the same). It is still very crude. It also assumes
203 that a one-way anova was made.
204 ar: the output of anova()
208 nsteps = ar[
'nsteps']
214 return nsteps * (numpy.sqrt(Finv(1 - alpha, nreps - 1,
215 nreps * (nsteps - 1)) / fstat) - 1)
221 """When created, estimates the heat capacity from the energies or from the
222 indicator functions using the specified method. Two methods are then
223 available to guess the heat capacity at a given parameter value: get()
224 returns a single point, and mean() returns the linear average between two
228 def __init__(self, params, energies=None, indicators=None,
229 method=
"constant", temps=
None, write_cv=
False):
231 self.__initialized =
False
235 if method ==
"interpolate":
239 elif method ==
"constant":
241 self.get =
lambda p: self.__cv
242 self.mean =
lambda p1, p2: self.__cv
243 self.__cvfun = self.get
244 r(
'cvspline <- function(x) {%f}' % self.__cv)
245 elif method ==
"mbar":
248 self.mean = self.mean_mbar
250 raise NotImplementedError(method)
252 self.__initialized =
True
257 fl.write(
"".join([
"%f %f\n" % (x, self.get(x))
258 for x
in numpy.linspace(params[0] / 2, 2 * params[-1])]))
262 """interpolate using previous values, by reversing the approximate
265 if self.__initialized:
267 if len(indicators) != len(params) - 1:
269 "the length of indicators and params does not match!")
270 if params != tuple(sorted(params)):
271 raise NotImplementedError(
272 "unable to work on parameters that do not change "
275 prdb(
"storing params and means")
276 self.__params = params
277 self.__pmeans = [(params[i] + params[i + 1]) / 2.
278 for i
in range(len(params) - 1)]
279 prdb(
"computing __cv")
280 for i, ind
in enumerate(indicators):
281 mean = sum(ind) / float(len(ind))
282 Y2 = 2 * kB * erfinv(1 - mean) ** 2
287 (p1 ** 2 + p2 ** 2) * float(Y2) / (p2 - p1) ** 2))
290 self.__cvmean = sum([i[1]
for i
in self.__cv]) / float(len(self.__cv))
291 if self.__cvmean < 0:
292 raise ValueError(
"Cv mean is negative!")
293 self.__cvfun = spline(self.__cv, self.__cvmean)
297 """try to guess which constant cv fits best"""
298 if self.__initialized:
301 self.__cv = self.__cvmean
304 def needs_init(self):
305 if not self.__initialized:
306 raise RuntimeError(
"Class was not initialized correctly!")
309 "use MBAR to get the heat capacity"
310 raise NotImplementedError(
"estimate_cv_mbar")
312 def _isinbounds(self, p, params):
313 """returns True if p is within params, else false. the params list
314 must be sorted ascendingly."""
315 if p < params[0] - EPSILON
or p > params[-1] + EPSILON:
322 def _interpolate(self, xval, xlist):
323 """return interpolation of Cv at point xval, and return the average
324 instead if this value is negative.
326 self._isinbounds(xval, xlist)
327 val = self.__cvfun(xval)
334 """returns the point estimate of the first derivative of the energy
335 with respect to the replica exchange parameter (usually T or q).
336 This version assumes that the means of cv are given.
339 return self._interpolate(param, self.__pmeans)
342 """returns the point estimate of the first derivative of the energy
343 with respect to the replica exchange parameter (usually T or q).
344 This version assumes that the values of cv are given.
347 return self._interpolate(param, self.__params)
350 """estimate the mean of Cv between two points. Here the means were
354 return self._interpolate((pa + pb) / 2., self.__pmeans)
356 def mean_mbar(self, pa, pb):
363 def update_good_dumb(newp, oldp, *args, **kwargs):
364 """Here the old parameters are oldp[0] and oldp[1], and the starting point
365 is newp[0]. We should modify newp[1] so that the AR in the following cycle
366 is equal to the targetAR.
367 In the "dumb" method, the Cv and targetAR keywords are ignored.
368 Here the newp[1] parameter is modified because prior changes have set
369 newp[0] to a different value than oldp[0]. Thus, we should move newp[1] by
370 minimizing the effect on the AR since it is supposedly equal to targetAR.
371 In this simple method, the parameter is just translated.
374 "newp[0] has moved (%.3f -> %.3f), adjusting the position of newp[1]" %
376 return oldp[1] - (oldp[0] - newp[0])
379 def update_bad_dumb(newp, oldp, ind, targetAR=0.4, scale=0.1, **kwargs):
380 """Here the old parameters are oldp[0] and oldp[1], and the starting point
381 is newp[0]. We should modify newp[1] so that the AR in the following cycle
382 is equal to the targetAR.
383 In the "dumb" method, the Cv keyword is ignored. Here the newp[1]
384 parameter is modified to follow the possible translation of newp[0] by
385 calling update_good_dumb, and then newp[1] is added or subtracted scale% of
386 (oldp[1] - oldp[0]) to adjust to targetAR.
389 if newp[0] != oldp[0]:
390 newp[1] = update_good_dumb(newp, oldp)
391 if targetAR > sum(ind) / float(len(ind)):
392 prdb(
"""target AR is higher than expected, decreasing newp[1]""")
393 newp[1] -= scale * (oldp[1] - oldp[0])
395 prdb(
"""target AR is lower than expected, increasing newp[1]""")
396 newp[1] += scale * (oldp[1] - oldp[0])
400 def update_any_cv_step(newp, oldp, ind, targetAR=0.4, Cv=None, **kwargs):
401 """here we use the average AR formula of two gaussians to get newp[1] as a
402 function of newp[1], knowing the targetAR and estimating the Cv. If
403 targetAR is negative, consider that mean(ind) equals the target AR and
404 skip any calculation in the case that oldp[0] equals newp[0].
405 step: suppose the heat capacity is stepwise constant, i.e. use the heat
406 capacity at position newp[0] as an estimate of the mean of the heat
407 capacity between newp[0] and newp[1]. This does not require any
408 self-consistent loop.
413 if abs(oldp[0] - newp[0]) < EPSILON
and targetAR < 0:
416 targetAR = sum(ind) / float(len(ind))
418 Y = numpy.sqrt(2 * kB) * float(erfinv(1 - targetAR))
420 raise ValueError(
"""targetAR too small for this approximate method, use
421 the full self-consistent method instead.""")
422 return newp[0] * (cv + Y * numpy.sqrt(2 * cv - Y ** 2)) / (cv - Y ** 2)
425 def update_any_cv_sc(newp, oldp, ind, targetAR=0.4, Cv=None,
426 tol=1e-6, maxiter=10000):
427 """self-consistent solver version"""
431 if abs(oldp[0] - newp[0]) < EPSILON
and targetAR < 0:
434 targetAR = sum(ind) / float(len(ind))
436 Y = numpy.sqrt(2 * kB) * float(erfinv(1 - targetAR))
438 raise ValueError(
"""targetAR too small for this approximate method, use
439 the full self-consistent method instead.""")
440 targetp = newp[0] * (cv + Y * numpy.sqrt(2 * cv - Y ** 2)) / (cv - Y ** 2)
441 for i
in range(maxiter):
442 cv = Cv.mean(newp[0], targetp)
443 (oldtargetp, targetp) = (
444 targetp, newp[0] * (cv + Y * numpy.sqrt(2 * cv - Y ** 2))
446 if abs(targetp - oldtargetp) <= tol:
448 if numpy.isnan(targetp):
451 "targetAR too small for this approximate method, use the "
452 "full self-consistent method instead.")
454 raise ValueError(
"""something unexpected happened""")
456 prdb(
"""Warning: unable to converge the self-consistent after %d
457 iterations and a tolerance of %f. Change the method or decrease the
458 tolerance!""" % (maxiter, tol))
459 prdb(
"converged after %d iterations and a tolerance of %f for x=%f" %
464 def update_any_cv_scfull(newp, oldp, ind, targetAR=0.4, Cv=None,
465 tol=1e-6, maxiter=10000):
466 """self-consistent solver version, on the exact average AR equation"""
469 _ = r(
'u21 <- function(t1,t2) { integrate(Vectorize(cvspline),'
471 _ = r(
'b21 <- function(t1,t2) { 1./(kB*t2) - 1./(kB*t1) }')
472 _ = r(
'sigma2 <- function(t) {cvspline(t)*kB*t**2}')
473 _rovboltz = r(
'ovboltz <- function(t1,t2) {\
475 u21(t1,t2)/sqrt(2*(sigma2(t1)+sigma2(t2))))\
476 + exp(b21(t1,t2)*(u21(t1,t2)+b21(t1,t2)*(sigma2(t1)+sigma2(t2))/2))\
477 * (1+erf((u21(t1,t2)+b21(t1,t2)*(sigma2(t1)+sigma2(t2)))\
478 /(sqrt(2*(sigma2(t1)+sigma2(t2))))))\
481 'rootfn <- function(t2) {ovboltz(%f,t2)-%f}' %
485 if oldp[1] > oldp[0]:
490 while _rrootfn(tmp)[0] >= 0:
492 tmp += (oldp[1] - oldp[0])
494 raise RuntimeError(
"heat capacity goes negative")
496 raise RuntimeError(
'could not find zero of function!')
499 _runiroot = r(
'uniroot(rootfn,c(%f,%f),f.lower = %f, f.upper = %f, '
500 'tol = %f, maxiter = %d)'
501 % (newp[0], tmp, 1 - targetAR, -targetAR, tol, maxiter))
502 prdb(
"self-consistent solver converged after %s iterations and an "
503 "estimated precision of %s " % (_runiroot[2][0], _runiroot[3][0]))
509 _runiroot[0][0])[0]])
510 return _runiroot[0][0]
513 def update_any_cv_nr(newp, oldp, ind, targetAR=0.4, Cv=None, **kwargs):
514 """newton-raphson solver version"""
517 raise NotImplementedError
522 def are_equal_to_targetAR(
527 """here, all indicators have same average, we want to know if it is
532 deviations = sorted([(abs(sum(ind) / float(len(ind)) - targetAR), ind)
533 for pos, ind
in enumerate(indicators)])
534 deviant = deviations[-1]
537 if method ==
"ttest":
540 elif method ==
"binom":
543 raise NotImplementedError
546 test, pval = our_ttest(deviant[1], targetAR)
548 if abs(targetAR - sum(deviant[1]) / len(deviant[1])) > EPSILON:
558 def are_stationnary(indicators, alpha=0.05, method="anova"):
559 """test on the stationarity of the observations (block analysis). Done
560 so by launching an anova on the difference between the two halves of
564 if method ==
"kruskal":
569 tmp = numpy.array(indicators)
570 blocklen = len(indicators[0]) / 2
571 block = tmp[:, :blocklen] - tmp[:, blocklen:2 * blocklen]
572 if test(*block)[
'pval'] < alpha:
578 def are_equal(indicators, targetAR=0.4, alpha=0.05,
579 method=
"anova", varMethod=
"skip", power=0.8):
580 """Perform a one-way ANOVA or kruskal-wallis test on the indicators set,
581 and return True if one cannot exclude with risk alpha that the indicators
582 AR are different (i.e. True = all means are equal). Also performs a test
583 on the variance (they must be equal).
586 if min(targetAR, 1 - targetAR) * len(indicators[0]) <= 5
and \
587 (varMethod ==
"bartlett" or method ==
"anova"):
588 prdb(
"Warning: normal approximation to the binomial does not hold!")
591 if varMethod ==
"skip":
594 if varMethod ==
"bartlett":
595 pval = bartlett(*indicators)[1]
596 elif varMethod ==
"fligner":
597 pval = fligner(*indicators)[1]
599 raise NotImplementedError(
600 "variance testing method unknown: %s" %
603 prdb(
"Warning: performing mean test with unequal variances.")
605 if method ==
"kruskal":
610 tr = test(*indicators)
613 tr[
'result'] = tr[
'pval'] >= alpha
618 def find_good_ARs(indicators, targetAR=0.4, alpha=0.05, method="binom"):
619 """perform one-sample t-tests on each of the data sets, and return
620 a tuple of bool of size N-1, False if AR i is not equal to targetAR
625 means = sorted([(sum(ind) / float(len(ind)), pos, ind)
626 for pos, ind
in enumerate(indicators)])
629 if method ==
"ttest":
632 elif method ==
"binom":
635 raise NotImplementedError
639 prdb(
"starting left")
640 for (i, (mean, pos, ind))
in enumerate(means):
641 prdb(
"performing t-test on couple %d having average AR %f, "
642 "position %d" % (pos, mean, i))
644 test, pval = our_ttest(ind, targetAR)
646 if abs(targetAR - mean) > EPSILON:
652 isGoodTuple.append((pos,
False))
657 prdb(
"starting right")
658 for (i, (mean, pos, ind))
in enumerate(reversed(means)):
659 prdb(
"performing t-test on couple %d having average AR %f, position %d"
660 % (pos, mean, len(means) - 1 - i))
661 if our_ttest(ind, targetAR)[1] < alpha:
663 isGoodTuple.append((pos,
False))
665 goodstop = len(means) - 1 - i
669 if len(isGoodTuple) > len(indicators):
670 return tuple([
False] * len(indicators))
672 elif len(isGoodTuple) == 0:
673 return tuple([
True] * len(indicators))
676 isGoodTuple.extend([(means[i][1],
True)
for i
in
677 range(goodstart, goodstop + 1)])
679 return tuple([tup[1]
for tup
in isGoodTuple])
684 def mean_first_passage_times(
689 """compute mean first passage times as suggested in
690 Nadler W, Meinke J, Hansmann UHE, Phys Rev E *78* 061905 (2008)
692 use_avgAR : if a list of average ARs is given computes everything from
693 average AR; if it is False, compute by direct counting of events.
696 If use_avgAR == False:
697 tau0, tauN, chose_N, times0, timesN
699 tau0, tauN, None, None, None
700 tau0[i]: average time to go from replica 0 to replica i
701 tauN[i]: average time to go from replica N to replica i
702 times0 and timesN are the corresponding lists of single events.
706 from numpy
import array, zeros
707 replicanums = array(replicanums_ori)[:, start::subs]
715 for state
in range(1, N):
716 tau0[state] = tau0[state - 1] + \
717 state / (float(use_avgAR[state - 1]))
718 for state
in reversed(range(1, N)):
719 tauN[state - 1] = tauN[state] \
720 + (N - (state - 1)) / (float(use_avgAR[state - 1]))
722 return tau0, tauN,
None,
None,
None
726 if sys.version_info[0] >= 3:
729 from itertools
import izip
734 store0 = zeros((N, N), dtype=bool)
735 last0 = [0
for i
in range(N)]
736 already0 = [
False for i
in range(N)]
737 times0 = [[]
for i
in range(N)]
738 storeN = zeros((N, N), dtype=bool)
739 lastN = [0
for i
in range(N)]
740 alreadyN = [
False for i
in range(N)]
741 timesN = [[]
for i
in range(N)]
744 for time, frame
in enumerate(izip(*replicanums)):
746 if not already0[frame[0]]:
747 last0[frame[0]] = time
748 store0[frame[0], :] =
True
749 already0[frame[0]] =
True
751 if not alreadyN[frame[-1]]:
752 lastN[frame[-1]] = time
753 storeN[frame[-1], :] =
True
754 alreadyN[frame[-1]] =
True
756 already0[frame[1]] =
False
757 alreadyN[frame[-2]] =
False
759 for state, rep
in enumerate(frame):
760 if store0[rep, state]:
762 store0[rep, state] =
False
764 times0[state].append(time - last0[rep])
765 if storeN[rep, state]:
767 storeN[rep, state] =
False
769 timesN[state].append(time - lastN[rep])
772 chose_N = [len(timesN[state]) > len(times0[state])
for state
in
774 for state
in range(N):
775 tauN[state] = sum(timesN[state]) / float(len(timesN[state]))
776 tau0[state] = sum(times0[state]) / float(len(times0[state]))
779 return tau0, tauN, chose_N, times0, timesN
782 def compute_effective_fraction(tau0, tauN, chose_N):
783 """input: tau0, tauN, chose_N
784 output: effective fraction f(T) (P_up(n)) as introduced in
785 Trebst S, Troyer M, Hansmann UHE, J Chem Phys *124* 174903 (2006).
787 Nadler W, Hansmann UHE, Phys Rev E *75* 026109 (2007)
788 and whose calculation is enhanced in
789 Nadler W, Meinke J, Hansmann UHE, Phys Rev E *78* 061905 (2008)
790 the ideal value of f(n) should be 1 - n/N
798 nstar = N - sum([int(a)
for a
in chose_N]) - 1
800 prdb(
"n* = %d" % nstar)
804 for state
in range(2, nstar + 1):
805 h0[state] = h0[state - 1] + \
806 (tau0[state] - tau0[state - 1]) / float(state)
810 for state
in reversed(range(nstar, N - 1)):
811 hN[state] = hN[state + 1] + \
812 (tauN[state] - tauN[state + 1]) / float(N - state)
817 for n
in range(1, nstar + 1):
818 pup[n] = 1 - h0[n] / (h0[nstar] + hN[nstar])
819 for n
in range(nstar + 1, N):
820 pup[n] = hN[n] / (h0[nstar] + hN[nstar])
825 def spline_diffusivity(pup, params):
826 """spline interpolation of diffusivity: D = 1/(df/dT * heta)
828 from numpy
import linspace
829 robjects.globalenv[
"hetay"] = \
830 robjects.FloatVector(linspace(0, 1, num=len(params)).tolist())
831 robjects.globalenv[
"hetax"] = robjects.FloatVector(params)
832 robjects.globalenv[
"pupx"] = robjects.FloatVector(params)
833 robjects.globalenv[
"pupy"] = robjects.FloatVector(pup)
834 _ = r(
'heta <- splinefun(hetax,hetay,method="monoH.FC")')
835 _ = r(
'eff <- splinefun(pupx,pupy,method="monoH.FC")')
836 diff = r(
'diff <- function(x) {-1/(heta(x,deriv=1)*eff(x,deriv=1))}')
837 return lambda x: diff(x)[0]
841 def compute_indicators(replicanums, subs=1, start=0):
842 """input: replicanums : a list of N lists of size M, where N is the number
843 of states and M is the length of the simulation. Each element is an
844 integer, and corresponds to the label of a replica.
845 output: an indicator function of exchanges (size (N-1)x(M-1)), 1 if
846 exchange and 0 if not.
849 if replicanums[n][m] == replicanums[n + 1][m + 1] \
850 and replicanums[n][m + 1] == replicanums[n + 1][m]:
856 for n
in range(len(replicanums) - 1):
859 for m
in range(len(replicanums[n]) - 1)][start::subs])
865 def update_params_nonergodic(pup, params, write_g=False, num=False):
867 from numpy
import linspace
869 g = linear_interpolation(list(zip(pup, params)), 0)
871 d = spline_diffusivity(pup, params)
873 fl.write(
"".join([
"%f %f\n" % (x, g(x))
874 for x
in linspace(0, 1, num=100)]))
876 fl = open(
'diffusivity',
'w')
877 fl.write(
''.join([
"%f %f\n" % (x, d(x))
for x
in
878 linspace(params[0], params[-1], num=100)]))
880 fl = open(
'pup',
'w')
881 fl.write(
"".join([
"%f %f\n" % (i, j)
for (i, j)
in zip(params, pup)]))
885 newparams = [g(i)
for i
in reversed(linspace(0, 1, num=len(params)))]
887 newparams = [g(i)
for i
in reversed(linspace(0, 1, num=num))]
889 newparams[0] = params[0]
890 newparams[-1] = params[-1]
896 indicators, params, isGood, targetAR=0.4, immobilePoint=1,
897 Cv=
None, badMethod=
"dumb", goodMethod=
"dumb", dumb_scale=0.1):
898 """update the parameters according to the isGood tuple and using the
901 newparams = list(params)
903 if immobilePoint != 1:
904 raise NotImplementedError
906 if Cv
is None and (badMethod !=
"dumb" or goodMethod !=
"dumb"):
907 raise RuntimeError(
"""Cv needs to be estimated if using other methods
908 than 'dumb' for updating!""")
910 if goodMethod ==
"dumb":
911 update_good = update_good_dumb
912 elif goodMethod ==
"step":
913 update_good = update_any_cv_step
914 elif goodMethod ==
"sc":
915 update_good = update_any_cv_sc
916 elif goodMethod ==
"scfull":
917 update_good = update_any_cv_scfull
918 elif goodMethod ==
"nr":
919 update_good = update_any_cv_nr
921 raise NotImplementedError(goodMethod)
922 if badMethod ==
"dumb":
923 update_bad = update_bad_dumb
924 elif badMethod ==
"step":
925 update_bad = update_any_cv_step
926 elif badMethod ==
"sc":
927 update_bad = update_any_cv_sc
928 elif badMethod ==
"scfull":
929 update_bad = update_any_cv_scfull
930 elif badMethod ==
"nr":
931 update_bad = update_any_cv_nr
933 raise NotImplementedError(badMethod)
936 for pos
in range(len(params) - 1):
938 newparams[pos + 1] = update_good(
939 newparams[pos:pos + 2], params[pos:pos + 2],
940 indicators[pos], targetAR=targetAR, Cv=Cv)
942 newparams[pos + 1] = update_bad(
943 newparams[pos:pos + 2], params[pos:pos + 2],
944 indicators[pos], targetAR=targetAR, Cv=Cv, scale=dumb_scale)
946 return tuple(newparams)
949 def tune_params_flux(replicanums, params, subs=1, start=0, alpha=0.05,
950 testMethod=
'anova', meanMethod=
'binom', use_avgAR=
False,
951 power=0.8, num=
False):
956 if use_avgAR
is not False:
957 raise NotImplementedError
959 prdb(
"computing mean first passage times")
960 tau0, tauN, chose_N, times0, timesN = mean_first_passage_times(
961 replicanums, subs=subs, start=start, use_avgAR=use_avgAR)
963 prdb(
"average round trip time: %.2f (%d+%d events)" %
964 (tau0[-1] + tauN[0], len(times0[-1]), len(timesN[0])))
965 prdb(
"checking if the parameterset needs to be improved")
970 nstar = N - sum([int(a)
for a
in chose_N]) - 1
974 for n
in range(1, N - 1):
976 reduced.append([i * 2.0 / ((N - n) * (N - n + 1))
979 reduced.append([i * 2.0 / (n * (n + 1))
for i
in times0[n]])
981 anova_result = are_equal(reduced, alpha=alpha, method=testMethod,
983 if (anova_result[
'result']):
984 prdb(
"flux is constant, nothing to do!")
985 min_n = minimum_n(anova_result, alpha)
986 prdb(
'Try to rerun this test with at least %d more samples.' %
988 return (
False, min_n)
992 prdb(
"parameterset not optimal, computing effective fraction")
993 pup = compute_effective_fraction(tau0, tauN, chose_N)
996 prdb(
"returning new parameterset")
997 params = update_params_nonergodic(pup, params, num=num)
999 return (
True, params)
1002 def tune_params_ar(indicators, params, targetAR=0.4, alpha=0.05,
1003 immobilePoint=1, CvMethod=
"skip", badMethod=
"dumb",
1004 goodMethod=
"dumb", varMethod=
"skip", testMethod=
"anova",
1005 meanMethod=
"binom", energies=
None, temps=
None, power=0.8,
1007 """Tune the replica-exchange parameters and return a new set.
1010 indicators -- an (N-1)x(M-1) table, where entry at position (j,i)
1011 is True if replica j and j+1 performed an exchange between
1013 params -- the current set of N parameters used in the simulation.
1016 targetAR -- the target AR which is wanted for the simulation
1018 alpha -- the type 1 error on the one-way ANOVA and subsequent
1019 t-tests (default: 5%)
1020 immobilePoint -- which replica should keep it's parameter fixed
1021 (default: 1st replica, e.g. 1)
1022 CvMethod -- the heat capacity estimation method (default:
1023 "skip", other options: "mbar", "spline", "constant")
1024 badMethod -- how to correct the (j+1)th parameter if the
1025 acceptance ratio between replicas j and j+1 is off the
1026 target value (default: "dumb", options: "step", "sc",
1028 goodMethod -- how to update the value of the (j+1)th parameter
1029 in the case of a correctly exchanging couple, but if the jth
1030 parameter has been modified (default: "dumb",options: "step",
1031 "sc" self-consistent, "scfull" self-consistent using the exact
1032 equation, "nr" newton-raphson solver for the exact equation)
1033 dumb_scale -- (0.0-1.0) in the "dumb" method, scale wrong temperature
1034 intervals by this amount. (default: 0.1)
1035 testMethod -- how to test for the difference of the means,
1036 either "anova" for a one-way anova, or "kruskal" for a
1037 Kruskal-Wallis one-way non-parametric anova.
1038 meanMethod -- "ttest" for a two-sided one-sample t-test,
1039 "binom" for an exact binomial test of the probability.
1040 varMethod -- how to test for the equality of variances.
1041 "fligner" for the Fligner-Killeen non-parametric test,
1042 "bartlett" for Bartlett's test, "skip" to pass.
1043 energies -- if CvMethod is set to "mbar", the energies of each
1044 state as a function of time are used to estimate the heat capacity.
1045 temps -- the temperatures of the simulations, if estimating
1049 returns a tuple: (bool, params). bool is True if params have
1050 changed, and params is the new set.
1055 prdb(
"performing ANOVA")
1056 anova_result = are_equal(indicators, targetAR, alpha, method=testMethod,
1057 varMethod=varMethod, power=power)
1058 if (anova_result[
'result']
1059 and are_equal_to_targetAR(indicators, targetAR, alpha,
1060 method=meanMethod)):
1061 prdb(
"all means are equal to target AR, nothing to do!")
1062 min_n = minimum_n(anova_result, alpha)
1064 'Try to rerun this test with at least %d more samples.' %
1066 return (
False, min_n)
1067 prdb(
"some means are different, performing t-tests")
1070 isGood = find_good_ARs(indicators, targetAR, alpha, method=meanMethod)
1071 if not (
False in isGood):
1072 prdb(
"""Bad luck: ANOVA says means are not identical, but t-tests
1073 can't find the bad rates...""")
1074 return (
False, params)
1078 prdb(
"performing stationarity test")
1079 if not are_stationnary(indicators, alpha):
1080 prdb(
"Warning: Some simulations are not stationary!")
1084 prdb(
"launching Cv estimation or skipping it")
1085 if CvMethod ==
"skip":
1087 elif CvMethod ==
"interpolate" or CvMethod ==
"constant":
1088 Cv =
CvEstimator(params, indicators=indicators, method=CvMethod)
1089 elif CvMethod ==
"mbar":
1096 raise NotImplementedError(CvMethod)
1099 prdb(
'updating params')
1101 params = update_params(indicators, params, isGood, targetAR=targetAR,
1102 immobilePoint=immobilePoint, Cv=Cv,
1103 badMethod=badMethod, goodMethod=goodMethod,
1104 dumb_scale=dumb_scale)
1107 return (
True, params)
1110 if __name__ ==
'__main__':
1113 for i
in range(1, 8):
1115 tuple(numpy.fromfile(
'data/replica-indices/%d.rep' % i,
1116 dtype=int, sep=
'\n')))
1118 prdb(
"replicanums: %dx%d" % (len(replicanums), len(replicanums[0])))
1119 params = tuple(numpy.fromfile(
'data/temperatures', sep=
' '))
1122 indicators = compute_indicators(replicanums, subs=1, start=0)
1123 prdb(
"indicators: %dx%d" % (len(indicators), len(indicators[0])))
1124 prdb(
"Exchange rate:")
1125 prdb(numpy.array([sum(ind) / float(len(ind))
for ind
in indicators]))
1126 numpy.array([sum(ind) / float(len(ind))
1127 for ind
in indicators]).tofile(
'xchgs', sep=
'\n')
1128 changed, newparams = tune_params_ar(
1129 indicators, params, targetAR=0.25,
1130 badMethod=
"dumb", goodMethod=
"dumb", CvMethod=
"skip",
1131 testMethod=
"anova", alpha=0.05)
1133 print(
"Parameter set seems optimal.")
1135 if True not in [abs(newparams[i + 1] - newparams[i]) < 1e-3
1136 for i
in range(len(newparams) - 1)]:
1137 numpy.array(newparams).tofile(
'data/temperatures', sep=
' ')
1139 print(
"PROBLEM IN NEW PARAMETERSET -> not saved")
1140 print(
"params :", params)
1141 print(
"new params:", newparams)
def estimate_cv_constant
try to guess which constant cv fits best
def estimate_cv_interpolate
interpolate using previous values, by reversing the approximate overlap function
When created, estimates the heat capacity from the energies or from the indicator functions using the...
def mean_interp
estimate the mean of Cv between two points.
def estimate_cv_mbar
use MBAR to get the heat capacity
def get_mbar
returns the point estimate of the first derivative of the energy with respect to the replica exchange...
def get_interp
returns the point estimate of the first derivative of the energy with respect to the replica exchange...