3 from __future__
import print_function
6 This module provides a few methods to improve the efficiency of a replica-exchange simulation
7 by tuning its parameters.
16 from numpy.random
import randint
17 import rpy2.robjects
as robjects
19 kB = 1.3806503 * 6.0221415 / 4184.0
33 robjects.globalenv[
"kB"] = kB
34 _rinverf = r(
'invErf <- function(x) {qnorm((1 + x) /2) / sqrt(2)}')
35 _rerf = r(
'erf <- function(x) {2 * pnorm(x * sqrt(2)) - 1}')
49 return _rinvF(x, d1, d2)[0]
52 def spline(xy, mean, method=None):
53 """spline interpolation of (x,y) coordinates. If interpolation goes
54 negative, replace by mean value.
57 robjects.globalenv[
"x"] = robjects.FloatVector(x)
58 robjects.globalenv[
"y"] = robjects.FloatVector(y)
62 r(
'cvsplinenonbounded <- splinefun(x,y)')
64 r(
'cvsplinenonbounded <- splinefun(x,y,method="%s")' % method)
66 'cvspline <- function(x) { tmp = cvsplinenonbounded(x); if (tmp>0) {tmp} else {%f}}' %
75 def linear_interpolation(xy, mean):
76 """linear interpolation of (x,y) coordinates. No extrapolation possible.
79 robjects.globalenv[
"x"] = robjects.FloatVector(x)
80 robjects.globalenv[
"y"] = robjects.FloatVector(y)
83 _rinterp = r(
'cvspline <- approxfun(x,y)')
94 """perform anova using R and return statistic, p-value, between and within variance"""
98 reps = r.rep(0, len(args[0]))
99 weight = robjects.FloatVector(args[0])
100 for i
in range(1, len(args)):
101 reps += r.rep(i, len(args[i]))
102 weight += robjects.FloatVector(args[i])
103 group = r.factor(reps)
104 robjects.globalenv[
"weight"] = weight
105 robjects.globalenv[
"group"] = group
106 lm = r.lm(
"weight ~ group")
110 anova_result = {
'fstat': aov[3][0],
112 'between': aov[2][0],
114 'nsteps': [len(i)
for i
in args],
121 """perform kruskal-wallis rank test"""
125 reps = r.rep(0, len(args[0]))
126 weight = robjects.FloatVector(args[0])
127 for i
in range(1, len(args)):
128 reps += r.rep(i, len(args[i]))
129 weight += robjects.FloatVector(args[i])
130 group = r.factor(reps)
131 aov = r(
'kruskal.test')(group, weight)
133 kruskal_result = {
'fstat': aov[0][0],
135 'nsteps': [len(i)
for i
in args],
138 return kruskal_result
141 def ttest(obs, target):
142 """perform a one-sample two-sided t-test on obs against target mean"""
143 test = r(
't.test')(robjects.IntVector(obs), mu=target)
144 return test[0][0], test[2][0]
147 def binom(obs, target):
148 """perform an exact binomial test on the mean of obs against target"""
151 test = r(
'binom.test')(success, trials, p=target)
152 return test[0][0], test[2][0]
156 """perform bartlett's test on the equality of variances of the observations"""
159 group = r.gl(ngroups, nreps)
160 weight = robjects.IntVector(args[0])
162 weight += robjects.IntVector(i)
163 robjects.globalenv[
"weight"] = weight
164 robjects.globalenv[
"group"] = group
165 var = r(
'bartlett.test')(weight, group)
166 return var[0][0], var[2][0]
170 """perform Fligner-Killeen non-parametric test of the variance equality"""
173 group = r.gl(ngroups, nreps)
174 weight = robjects.IntVector(args[0])
176 weight += robjects.IntVector(i)
177 robjects.globalenv[
"weight"] = weight
178 robjects.globalenv[
"group"] = group
179 var = r(
'fligner.test')(weight, group)
180 return var[0][0], var[2][0]
183 def power_test(ar, power=0.8, alpha=0.05):
184 """perform an anova power test and return
185 - the power of the test with this input data
186 - the number of points that would be needed to achieve a default power of 0.8
187 ar: the ouput of anova()
189 result = r(
'power.anova.test')(groups=ar[
'nreps'], n=min(ar[
'nsteps']),
190 between=ar[
'between'], within=ar[
'within'], sig=alpha)
191 prdb(
'the power of this anova was: %.3f' % result[5][0])
192 result = r(
'power.anova.test')(groups=ar[
'nreps'],
193 between=ar[
'between'], within=ar[
'within'], sig=alpha, pow=power)
195 'To have a power of %.3f, there should be at least %d exchange attemps.' %
196 (power, result[1][0]))
200 def minimum_n(ar, alpha=0.05):
201 """This routine tries to return an estimate of the additional number of exchange trials that
202 could lead to a positive result of the anova (e.g. the average ARs are not the same). It is
203 still very crude. It also assumes that a one-way anova was made.
204 ar: the output of anova()
208 nsteps = ar[
'nsteps']
216 (sqrt(Finv(1 - alpha, nreps - 1, nreps * (nsteps - 1)) / fstat) - 1)
223 """When created, estimates the heat capacity from the energies or from the
224 indicator functions using the specified method. Two methods are then
225 available to guess the heat capacity at a given parameter value: get()
226 returns a single point, and mean() returns the linear average between two
230 def __init__(self, params, energies=None, indicators=None,
231 method=
"constant", temps=
None, write_cv=
False):
233 kB = 1.3806503 * 6.0221415 / 4184.0
234 self.__initialized =
False
238 if method ==
"interpolate":
242 elif method ==
"constant":
244 self.get =
lambda p: self.__cv
245 self.mean =
lambda p1, p2: self.__cv
246 self.__cvfun = self.get
247 r(
'cvspline <- function(x) {%f}' % self.__cv)
248 elif method ==
"mbar":
251 self.mean = self.mean_mbar
253 raise NotImplementedError(method)
255 self.__initialized =
True
260 fl.write(
"".join([
"%f %f\n" % (x, self.get(x))
261 for x
in linspace(params[0] / 2, 2 * params[-1])]))
265 """interpolate using previous values, by reversing the approximate overlap
268 if self.__initialized:
270 if len(indicators) != len(params) - 1:
272 "the length of indicators and params does not match!")
273 if params != tuple(sorted(params)):
274 raise NotImplementedError(
275 "unable to work on parameters that do not change monotonically")
277 prdb(
"storing params and means")
278 self.__params = params
279 self.__pmeans = [(params[i] + params[i + 1]) / 2.
280 for i
in range(len(params) - 1)]
281 prdb(
"computing __cv")
282 for i, ind
in enumerate(indicators):
283 mean = sum(ind) / float(len(ind))
284 Y2 = 2 * kB * erfinv(1 - mean) ** 2
288 (self.__pmeans[i], (p1 ** 2 + p2 ** 2) * float(Y2) / (p2 - p1) ** 2))
291 self.__cvmean = sum([i[1]
for i
in self.__cv]) / float(len(self.__cv))
292 if self.__cvmean < 0:
293 raise ValueError(
"Cv mean is negative!")
294 self.__cvfun = spline(self.__cv, self.__cvmean)
298 """try to guess which constant cv fits best"""
299 if self.__initialized:
302 self.__cv = self.__cvmean
305 def needs_init(self):
306 if not self.__initialized:
307 raise RuntimeError(
"Class was not initialized correctly!")
310 "use MBAR to get the heat capacity"
311 raise NotImplementedError(method)
312 if self.__initialized:
315 def _isinbounds(self, p, params):
316 """returns True if p is within parms, else false. the params list must be sorted ascendingly."""
317 if p < params[0] - EPSILON
or p > params[-1] + EPSILON:
323 def _interpolate(self, xval, xlist):
324 """return interpolation of Cv at point xval, and return the average instead
325 if this value is negative.
327 self._isinbounds(xval, xlist)
328 val = self.__cvfun(xval)
335 """returns the point estimate of the first derivative of the energy with
336 respect to the replica exchange parameter (usually T or q).
337 This version assumes that the means of cv are given.
340 return self._interpolate(param, self.__pmeans)
343 """returns the point estimate of the first derivative of the energy with
344 respect to the replica exchange parameter (usually T or q).
345 This version assumes that the values of cv are given.
348 return self._interpolate(param, self.__params)
351 """estimate the mean of Cv between two points. Here the means were
355 return self._interpolate((pa + pb) / 2., self.__pmeans)
357 def mean_mbar(self, pa, pb):
364 def update_good_dumb(newp, oldp, *args, **kwargs):
365 """Here the old parameters are oldp[0] and oldp[1], and the starting point
366 is newp[0]. We should modify newp[1] so that the AR in the following cycle
367 is equal to the targetAR.
368 In the "dumb" method, the Cv and targetAR keywords are ignored.
369 Here the newp[1] parameter is modified because prior changes have set
370 newp[0] to a different value than oldp[0]. Thus, we should move newp[1] by
371 minimizing the effect on the AR since it is supposedly equal to targetAR. In
372 this simple method, the parameter is just translated.
375 "newp[0] has moved (%.3f -> %.3f), adjusting the position of newp[1]" %
377 return oldp[1] - (oldp[0] - newp[0])
380 def update_bad_dumb(newp, oldp, ind, targetAR=0.4, scale=0.1, **kwargs):
381 """Here the old parameters are oldp[0] and oldp[1], and the starting point
382 is newp[0]. We should modify newp[1] so that the AR in the following cycle
383 is equal to the targetAR.
384 In the "dumb" method, the Cv keyword is ignored. Here the newp[1]
385 parameter is modified to follow the possible translation of newp[0] by
386 calling update_good_dumb, and then newp[1] is added or substracted scale% of
387 (oldp[1] - oldp[0]) to adjust to targetAR.
390 if newp[0] != oldp[0]:
391 newp[1] = update_good_dumb(newp, oldp)
392 if targetAR > sum(ind) / float(len(ind)):
393 prdb(
"""target AR is higher than expected, decreasing newp[1]""")
394 newp[1] -= scale * (oldp[1] - oldp[0])
396 prdb(
"""target AR is lower than expected, increasing newp[1]""")
397 newp[1] += scale * (oldp[1] - oldp[0])
401 def update_any_cv_step(newp, oldp, ind, targetAR=0.4, Cv=None, **kwargs):
402 """here we use the average AR formula of two gaussians to get newp[1] as a
403 function of newp[1], knowing the targetAR and estimating the Cv. If targetAR
404 is negative, consider that mean(ind) equals the target AR and skip any
405 calculation in the case that oldp[0] equals newp[0].
406 step: suppose the heat capacity is stepwise constant, i.e. use the heat
407 capacity at position newp[0] as an estimate of the mean of the heat capacity
408 between newp[0] and newp[1]. This does not require any self-consistent loop.
413 if abs(oldp[0] - newp[0]) < EPSILON
and targetAR < 0:
416 targetAR = sum(ind) / float(len(ind))
418 Y = sqrt(2 * kB) * float(erfinv(1 - targetAR))
420 raise ValueError(
"""targetAR too small for this approximate method, use
421 the full self-consistent method instead.""")
422 return newp[0] * (cv + Y * sqrt(2 * cv - Y ** 2)) / (cv - Y ** 2)
425 def update_any_cv_sc(newp, oldp, ind, targetAR=0.4, Cv=None,
426 tol=1e-6, maxiter=10000):
427 """self-consistent solver version"""
431 if abs(oldp[0] - newp[0]) < EPSILON
and targetAR < 0:
434 targetAR = sum(ind) / float(len(ind))
436 Y = sqrt(2 * kB) * float(erfinv(1 - targetAR))
438 raise ValueError(
"""targetAR too small for this approximate method, use
439 the full self-consistent method instead.""")
440 targetp = newp[0] * (cv + Y * sqrt(2 * cv - Y ** 2)) / (cv - Y ** 2)
441 for i
in range(maxiter):
442 cv = Cv.mean(newp[0], targetp)
443 (oldtargetp, targetp) = (
444 targetp, newp[0] * (cv + Y * sqrt(2 * cv - Y ** 2)) / (cv - Y ** 2))
445 if abs(targetp - oldtargetp) <= tol:
449 raise ValueError(
"""targetAR too small for this approximate method, use
450 the full self-consistent method instead.""")
452 raise ValueError(
"""something unexpected happened""")
454 prdb(
"""Warning: unable to converge the self-consistent after %d
455 iterations and a tolerance of %f. Change the method or decrease the
456 tolerance!""" % (maxiter, tol))
457 prdb(
"converged after %d iterations and a tolerance of %f for x=%f" %
462 def update_any_cv_scfull(newp, oldp, ind, targetAR=0.4, Cv=None,
463 tol=1e-6, maxiter=10000):
464 """self-consistent solver version, on the exact average AR equation"""
469 'u21 <- function(t1,t2) { integrate(Vectorize(cvspline),t1,t2)$value }')
470 _rb21 = r(
'b21 <- function(t1,t2) { 1./(kB*t2) - 1./(kB*t1) }')
471 _rsigma2 = r(
'sigma2 <- function(t) {cvspline(t)*kB*t**2}')
472 _rovboltz = r(
'ovboltz <- function(t1,t2) {\
474 u21(t1,t2)/sqrt(2*(sigma2(t1)+sigma2(t2))))\
475 + exp(b21(t1,t2)*(u21(t1,t2)+b21(t1,t2)*(sigma2(t1)+sigma2(t2))/2))\
476 * (1+erf((u21(t1,t2)+b21(t1,t2)*(sigma2(t1)+sigma2(t2)))/(sqrt(2*(sigma2(t1)+sigma2(t2))))))\
479 'rootfn <- function(t2) {ovboltz(%f,t2)-%f}' %
483 if oldp[1] > oldp[0]:
488 while _rrootfn(tmp)[0] >= 0:
490 tmp += (oldp[1] - oldp[0])
492 raise RuntimeError(
"heat capacity goes negative")
494 raise RuntimeError(
'could not find zero of function!')
498 'uniroot(rootfn,c(%f,%f),f.lower = %f, f.upper = %f, tol = %f, maxiter = %d)' % (newp[0], tmp,
499 1 - targetAR, -targetAR, tol, maxiter))
501 "self-consistent solver converged after %s iterations and an estimated precision of %s " %
502 (_runiroot[2][0], _runiroot[3][0]))
508 _runiroot[0][0])[0]])
509 return _runiroot[0][0]
512 def update_any_cv_nr(newp, oldp, ind, targetAR=0.4, Cv=None, **kwargs):
513 """newton-raphson solver version"""
516 raise NotImplementedError
521 def are_equal_to_targetAR(
526 """here, all indicators have same average, we want to know if it is equal to
531 deviations = sorted([(abs(sum(ind) / float(len(ind)) - targetAR), ind)
532 for pos, ind
in enumerate(indicators)])
533 deviant = deviations[-1]
536 if method ==
"ttest":
539 elif method ==
"binom":
542 raise NotImplementedError
545 test, pval = ttest(deviant[1], targetAR)
547 if abs(targetAR - sum(deviant[1]) / len(deviant[1])) > EPSILON:
557 def are_stationnary(indicators, alpha=0.05, method="anova"):
558 """test on the stationarity of the observations (block analysis). Done so by
559 launching an anova on the difference between the two halves of each observations.
562 if method ==
"kruskal":
567 tmp = array(indicators)
568 blocklen = len(indicators[0]) / 2
569 block = tmp[:, :blocklen] - tmp[:, blocklen:2 * blocklen]
570 if test(*block)[
'pval'] < alpha:
576 def are_equal(indicators, targetAR=0.4, alpha=0.05,
577 method=
"anova", varMethod=
"skip", power=0.8):
578 """Perform a one-way ANOVA or kruskal-wallis test on the indicators set,
579 and return True if one cannot exclude with risk alpha that the indicators
580 AR are different (i.e. True = all means are equal). Also performs a test
581 on the variance (they must be equal).
584 if min(targetAR, 1 - targetAR) * len(indicators[0]) <= 5
and \
585 (varMethod ==
"bartlett" or method ==
"anova"):
586 prdb(
"Warning: normal approximation to the binomial does not hold!")
589 if varMethod ==
"skip":
592 if varMethod ==
"bartlett":
593 pval = bartlett(*indicators)[1]
594 elif varMethod ==
"fligner":
595 pval = killeen(*indicators)[1]
597 raise NotImplementedError(
598 "variance testing method unknown: %s" %
601 prdb(
"Warning: performing mean test with unequal variances.")
603 if method ==
"kruskal":
608 tr = test(*indicators)
611 tr[
'result'] = tr[
'pval'] >= alpha
623 def find_good_ARs(indicators, targetAR=0.4, alpha=0.05, method="binom"):
624 """perform one-sample t-tests on each of the data sets, and return
625 a tuple of bool of size N-1, False if AR i is not equal to targetAR
630 means = sorted([(sum(ind) / float(len(ind)), pos, ind)
631 for pos, ind
in enumerate(indicators)])
634 if method ==
"ttest":
637 elif method ==
"binom":
640 raise NotImplementedError
644 prdb(
"starting left")
645 for (i, (mean, pos, ind))
in enumerate(means):
647 "performing t-test on couple %d having average AR %f, position %d" %
650 test, pval = ttest(ind, targetAR)
652 if abs(targetAR - mean) > EPSILON:
658 isGoodTuple.append((pos,
False))
663 prdb(
"starting right")
664 for (i, (mean, pos, ind))
in enumerate(reversed(means)):
665 prdb(
"performing t-test on couple %d having average AR %f, position %d"
666 % (pos, mean, len(means) - 1 - i))
667 if ttest(ind, targetAR)[1] < alpha:
669 isGoodTuple.append((pos,
False))
671 goodstop = len(means) - 1 - i
675 if len(isGoodTuple) > len(indicators):
676 return tuple([
False] * len(indicators))
678 elif len(isGoodTuple) == 0:
679 return tuple([
True] * len(indicators))
682 isGoodTuple.extend([(means[i][1],
True)
for i
in
683 range(goodstart, goodstop + 1)])
685 return tuple([tup[1]
for tup
in isGoodTuple])
690 def mean_first_passage_times(
695 """compute mean first passage times as suggested in
696 Nadler W, Meinke J, Hansmann UHE, Phys Rev E *78* 061905 (2008)
698 use_avgAR : if a list of average ARs is given computes everything from
699 average AR; if it is False, compute by direct counting of events.
702 If use_avgAR == False:
703 tau0, tauN, chose_N, times0, timesN
705 tau0, tauN, None, None, None
706 tau0[i]: average time to go from replica 0 to replica i
707 tauN[i]: average time to go from replica N to replica i
708 times0 and timesN are the corresponding lists of single events.
712 from numpy
import array, zeros
713 from pprint
import pprint, pformat
714 replicanums = array(replicanums_ori)[:, start::subs]
722 for state
in range(1, N):
723 tau0[state] = tau0[state - 1] + \
724 state / (float(use_avgAR[state - 1]))
725 for state
in reversed(range(1, N)):
726 tauN[state - 1] = tauN[state] \
727 + (N - (state - 1)) / (float(use_avgAR[state - 1]))
729 return tau0, tauN,
None,
None,
None
733 if sys.version_info[0] >= 3:
736 from itertools
import izip
741 store0 = zeros((N, N), dtype=bool)
742 last0 = [0
for i
in range(N)]
743 already0 = [
False for i
in range(N)]
744 times0 = [[]
for i
in range(N)]
745 storeN = zeros((N, N), dtype=bool)
746 lastN = [0
for i
in range(N)]
747 alreadyN = [
False for i
in range(N)]
748 timesN = [[]
for i
in range(N)]
751 for time, frame
in enumerate(izip(*replicanums)):
753 if not already0[frame[0]]:
754 last0[frame[0]] = time
755 store0[frame[0], :] =
True
756 already0[frame[0]] =
True
758 if not alreadyN[frame[-1]]:
759 lastN[frame[-1]] = time
760 storeN[frame[-1], :] =
True
761 alreadyN[frame[-1]] =
True
763 already0[frame[1]] =
False
764 alreadyN[frame[-2]] =
False
766 for state, rep
in enumerate(frame):
767 if store0[rep, state]:
769 store0[rep, state] =
False
771 times0[state].append(time - last0[rep])
772 if storeN[rep, state]:
774 storeN[rep, state] =
False
776 timesN[state].append(time - lastN[rep])
778 times = [[]
for i
in range(N)]
779 chose_N = [len(timesN[state]) > len(times0[state])
for state
in
781 for state
in range(N):
782 tauN[state] = sum(timesN[state]) / float(len(timesN[state]))
783 tau0[state] = sum(times0[state]) / float(len(times0[state]))
786 return tau0, tauN, chose_N, times0, timesN
789 def compute_effective_fraction(tau0, tauN, chose_N):
790 """input: tau0, tauN, chose_N
791 output: effective fraction f(T) (P_up(n)) as introduced in
792 Trebst S, Troyer M, Hansmann UHE, J Chem Phys *124* 174903 (2006).
794 Nadler W, Hansmann UHE, Phys Rev E *75* 026109 (2007)
795 and whose calculation is enhanced in
796 Nadler W, Meinke J, Hansmann UHE, Phys Rev E *78* 061905 (2008)
797 the ideal value of f(n) should be 1 - n/N
805 nstar = N - sum([int(a)
for a
in chose_N]) - 1
807 prdb(
"n* = %d" % nstar)
811 for state
in range(2, nstar + 1):
812 h0[state] = h0[state - 1] + \
813 (tau0[state] - tau0[state - 1]) / float(state)
817 for state
in reversed(range(nstar, N - 1)):
818 hN[state] = hN[state + 1] + \
819 (tauN[state] - tauN[state + 1]) / float(N - state)
824 for n
in range(1, nstar + 1):
825 pup[n] = 1 - h0[n] / (h0[nstar] + hN[nstar])
826 for n
in range(nstar + 1, N):
827 pup[n] = hN[n] / (h0[nstar] + hN[nstar])
832 def spline_diffusivity(pup, params):
833 """spline interpolation of diffusivity: D = 1/(df/dT * heta)
835 from numpy
import linspace
836 robjects.globalenv[
"hetay"] = \
837 robjects.FloatVector(linspace(0, 1, num=len(params)).tolist())
838 robjects.globalenv[
"hetax"] = robjects.FloatVector(params)
839 robjects.globalenv[
"pupx"] = robjects.FloatVector(params)
840 robjects.globalenv[
"pupy"] = robjects.FloatVector(pup)
841 heta = r(
'heta <- splinefun(hetax,hetay,method="monoH.FC")')
842 eff = r(
'eff <- splinefun(pupx,pupy,method="monoH.FC")')
843 diff = r(
'diff <- function(x) {-1/(heta(x,deriv=1)*eff(x,deriv=1))}')
844 return lambda x: diff(x)[0]
848 def compute_indicators(replicanums, subs=1, start=0):
849 """input: replicanums : a list of N lists of size M, where N is the number of
850 states and M is the length of the simulation. Each element is an integer,
851 and corresponds to the label of a replica.
852 output: an indicator function of exchanges (size (N-1)x(M-1)), 1 if exchange and
856 if replicanums[n][m] == replicanums[n + 1][m + 1] \
857 and replicanums[n][m + 1] == replicanums[n + 1][m]:
863 for n
in range(len(replicanums) - 1):
864 indicators.append([exchange(n, m)
865 for m
in range(len(replicanums[n]) - 1)][start::subs])
871 def update_params_nonergodic(pup, params, write_g=False, num=False):
873 from numpy
import linspace
875 g = linear_interpolation(list(zip(pup, params)), 0)
877 d = spline_diffusivity(pup, params)
879 fl.write(
"".join([
"%f %f\n" % (x, g(x))
880 for x
in linspace(0, 1, num=100)]))
882 fl = open(
'diffusivity',
'w')
883 fl.write(
''.join([
"%f %f\n" % (x, d(x))
for x
in
884 linspace(params[0], params[-1], num=100)]))
886 fl = open(
'pup',
'w')
887 fl.write(
"".join([
"%f %f\n" % (i, j)
for (i, j)
in zip(params, pup)]))
891 newparams = [g(i)
for i
in reversed(linspace(0, 1, num=len(params)))]
893 newparams = [g(i)
for i
in reversed(linspace(0, 1, num=num))]
895 newparams[0] = params[0]
896 newparams[-1] = params[-1]
902 indicators, params, isGood, targetAR=0.4, immobilePoint=1,
903 Cv=
None, badMethod=
"dumb", goodMethod=
"dumb", dumb_scale=0.1):
904 """update the parameters according to the isGood tuple and using the
907 newparams = list(params)
909 if immobilePoint != 1:
910 raise NotImplementedError
912 if Cv
is None and (badMethod !=
"dumb" or goodMethod !=
"dumb"):
913 raise RuntimeError(
"""Cv needs to be estimated if using other methods
914 than 'dumb' for updating!""")
916 if goodMethod ==
"dumb":
917 update_good = update_good_dumb
918 elif goodMethod ==
"step":
919 update_good = update_any_cv_step
920 elif goodMethod ==
"sc":
921 update_good = update_any_cv_sc
922 elif goodMethod ==
"scfull":
923 update_good = update_any_cv_scfull
924 elif goodMethod ==
"nr":
925 update_good = update_any_cv_nr
927 raise NotImplementedError(goodMethod)
928 if badMethod ==
"dumb":
929 update_bad = update_bad_dumb
930 elif badMethod ==
"step":
931 update_bad = update_any_cv_step
932 elif badMethod ==
"sc":
933 update_bad = update_any_cv_sc
934 elif badMethod ==
"scfull":
935 update_bad = update_any_cv_scfull
936 elif badMethod ==
"nr":
937 update_bad = update_any_cv_nr
939 raise NotImplementedError(badMethod)
942 for pos
in range(len(params) - 1):
944 newparams[pos + 1] = update_good(
945 newparams[pos:pos + 2], params[pos:pos + 2],
946 indicators[pos], targetAR=targetAR, Cv=Cv)
948 newparams[pos + 1] = update_bad(
949 newparams[pos:pos + 2], params[pos:pos + 2],
950 indicators[pos], targetAR=targetAR, Cv=Cv, scale=dumb_scale)
952 return tuple(newparams)
955 def tune_params_flux(replicanums, params, subs=1, start=0, alpha=0.05,
956 testMethod=
'anova', meanMethod=
'binom', use_avgAR=
False,
957 power=0.8, num=
False):
962 if use_avgAR
is not False:
963 raise NotImplementedError
965 prdb(
"computing mean first passage times")
966 tau0, tauN, chose_N, times0, timesN = mean_first_passage_times(replicanums,
967 subs=subs, start=start, use_avgAR=use_avgAR)
969 prdb(
"average round trip time: %.2f (%d+%d events)" %
970 (tau0[-1] + tauN[0], len(times0[-1]), len(timesN[0])))
971 prdb(
"checking if the parameterset needs to be improved")
976 nstar = N - sum([int(a)
for a
in chose_N]) - 1
980 for n
in range(1, N - 1):
982 reduced.append([i * 2.0 / ((N - n) * (N - n + 1))
985 reduced.append([i * 2.0 / (n * (n + 1))
for i
in times0[n]])
987 anova_result = are_equal(reduced, alpha=alpha, method=testMethod,
989 if (anova_result[
'result']):
990 prdb(
"flux is constant, nothing to do!")
991 min_n = minimum_n(anova_result, alpha)
992 prdb(
'Try to rerun this test with at least %d more samples.' %
994 return (
False, min_n)
998 prdb(
"parameterset not optimal, computing effective fraction")
999 pup = compute_effective_fraction(tau0, tauN, chose_N)
1002 prdb(
"returning new parameterset")
1003 params = update_params_nonergodic(pup, params, num=num)
1005 return (
True, params)
1009 indicators, params, targetAR=0.4, alpha=0.05, immobilePoint=1, CvMethod=
"skip", badMethod=
"dumb", goodMethod=
"dumb",
1010 varMethod=
"skip", testMethod=
"anova", meanMethod=
"binom",
1011 energies=
None, temps=
None, power=0.8, dumb_scale=0.1):
1012 """Tune the replica-exchange parameters and return a new set.
1015 indicators -- an (N-1)x(M-1) table, where entry at position (j,i)
1016 is True if replica j and j+1 performed an exchange between
1018 params -- the current set of N parameters used in the simulation.
1021 targetAR -- the target AR which is wanted for the simulation
1023 alpha -- the type 1 error on the one-way ANOVA and subsequent
1024 t-tests (default: 5%)
1025 immobilePoint -- which replica should keep it's parameter fixed
1026 (default: 1st replica, e.g. 1)
1027 CvMethod -- the heat capacity estimation method (default:
1028 "skip", other options: "mbar", "spline", "constant")
1029 badMethod -- how to correct the (j+1)th parameter if the
1030 acceptance ratio between replicas j and j+1 is off the
1031 target value (default: "dumb", options: "step", "sc", "scfull", "nr")
1032 goodMethod -- how to update the value of the (j+1)th parameter
1033 in the case of a correctly exchanging couple, but if the jth
1034 parameter has been modified (default: "dumb",options: "step",
1035 "sc" self-consistent, "scfull" self-consistent using the exact equation,
1036 "nr" newton-raphson solver for the exact equation)
1037 dumb_scale -- (0.0-1.0) in the "dumb" method, scale wrong temperature
1038 intervals by this amount. (default: 0.1)
1039 testMethod -- how to test for the difference of the means,
1040 either "anova" for a one-way anova, or "kruskal" for a
1041 Kruskal-Wallis one-way non-parametric anova.
1042 meanMethod -- "ttest" for a two-sided one-sample t-test,
1043 "binom" for an exact binomial test of the probability.
1044 varMethod -- how to test for the equality of variances.
1045 "fligner" for the Fligner-Killeen non-parametric test,
1046 "bartlett" for Bartlett's test, "skip" to pass.
1047 energies -- if CvMethod is set to "mbar", the energies of each
1048 state as a function of time are used to estimate the heat capacity.
1049 temps -- the temperatures of the simulations, if estimating with "mbar".
1052 returns a tuple: (bool, params). bool is True if params have
1053 changed, and params is the new set.
1058 prdb(
"performing ANOVA")
1059 anova_result = are_equal(indicators, targetAR, alpha, method=testMethod,
1060 varMethod=varMethod, power=power)
1061 if (anova_result[
'result']
and
1062 are_equal_to_targetAR(indicators, targetAR, alpha, method=meanMethod)):
1063 prdb(
"all means are equal to target AR, nothing to do!")
1064 min_n = minimum_n(anova_result, alpha)
1066 'Try to rerun this test with at least %d more samples.' %
1068 return (
False, min_n)
1069 prdb(
"some means are different, performing t-tests")
1072 isGood = find_good_ARs(indicators, targetAR, alpha, method=meanMethod)
1073 if not (
False in isGood):
1074 prdb(
"""Bad luck: ANOVA says means are not identical, but t-tests
1075 can't find the bad rates...""")
1076 return (
False, params)
1080 prdb(
"performing stationarity test")
1081 if not are_stationnary(indicators, alpha):
1082 prdb(
"Warning: Some simulations are not stationary!")
1086 prdb(
"launching Cv estimation or skipping it")
1087 if CvMethod ==
"skip":
1089 elif CvMethod ==
"interpolate" or CvMethod ==
"constant":
1090 Cv =
CvEstimator(params, indicators=indicators, method=CvMethod)
1091 elif CvMethod ==
"mbar":
1098 raise NotImplementedError(CvMethod)
1101 prdb(
'updating params')
1103 params = update_params(indicators, params, isGood, targetAR=targetAR,
1104 immobilePoint=immobilePoint, Cv=Cv,
1105 badMethod=badMethod, goodMethod=goodMethod, dumb_scale=dumb_scale)
1108 return (
True, params)
1110 if __name__ ==
'__main__':
1113 for i
in range(1, 8):
1114 replicanums.append(tuple(fromfile(
'data/replica-indices/%d.rep' % i,
1115 dtype=int, sep=
'\n')))
1117 prdb(
"replicanums: %dx%d" % (len(replicanums), len(replicanums[0])))
1118 params = tuple(fromfile(
'data/temperatures', sep=
' '))
1121 indicators = compute_indicators(replicanums, subs=1, start=0)
1122 prdb(
"indicators: %dx%d" % (len(indicators), len(indicators[0])))
1123 prdb(
"Exchange rate:")
1124 prdb(array([sum(ind) / float(len(ind))
for ind
in indicators]))
1125 array([sum(ind) / float(len(ind))
for ind
in
1126 indicators]).tofile(
'xchgs', sep=
'\n')
1127 changed, newparams = tune_params(indicators, params, targetAR=0.25,
1128 badMethod=
"dumb", goodMethod=
"dumb", CvMethod=
"skip", testMethod=
"anova", alpha=0.05)
1130 print(
"Parameter set seems optimal.")
1132 if not True in [abs(newparams[i + 1] - newparams[i]) < 1e-3
for i
in range(len(newparams) - 1)]:
1133 array(newparams).tofile(
'data/temperatures', sep=
' ')
1135 print(
"PROBLEM IN NEW PARAMETERSET -> not saved")
1136 print(
"params :", params)
1137 print(
"new params:", newparams)
def estimate_cv_constant
try to guess which constant cv fits best
def estimate_cv_interpolate
interpolate using previous values, by reversing the approximate overlap function
When created, estimates the heat capacity from the energies or from the indicator functions using the...
def mean_interp
estimate the mean of Cv between two points.
def estimate_cv_mbar
use MBAR to get the heat capacity
def get_mbar
returns the point estimate of the first derivative of the energy with respect to the replica exchange...
bool isnan(const T &a)
Return true if a number is NaN.
def get_interp
returns the point estimate of the first derivative of the energy with respect to the replica exchange...