3 from __future__
import print_function
6 This module provides a few methods to improve the efficiency of a
7 replica-exchange simulation by tuning its parameters.
12 import rpy2.robjects
as robjects
14 kB = 1.3806503 * 6.0221415 / 4184.0
28 robjects.globalenv[
"kB"] = kB
29 _rinverf = r(
'invErf <- function(x) {qnorm((1 + x) /2) / sqrt(2)}')
30 _rerf = r(
'erf <- function(x) {2 * pnorm(x * sqrt(2)) - 1}')
44 return _rinvF(x, d1, d2)[0]
47 def spline(xy, mean, method=None):
48 """spline interpolation of (x,y) coordinates. If interpolation goes
49 negative, replace by mean value.
52 robjects.globalenv[
"x"] = robjects.FloatVector(x)
53 robjects.globalenv[
"y"] = robjects.FloatVector(y)
57 r(
'cvsplinenonbounded <- splinefun(x,y)')
59 r(
'cvsplinenonbounded <- splinefun(x,y,method="%s")' % method)
61 'cvspline <- function(x) { tmp = cvsplinenonbounded(x); '
62 'if (tmp>0) {tmp} else {%f}}' %
71 def linear_interpolation(xy, mean):
72 """linear interpolation of (x,y) coordinates. No extrapolation possible.
75 robjects.globalenv[
"x"] = robjects.FloatVector(x)
76 robjects.globalenv[
"y"] = robjects.FloatVector(y)
79 _rinterp = r(
'cvspline <- approxfun(x,y)')
90 """perform anova using R and return statistic, p-value, between and
95 reps = r.rep(0, len(args[0]))
96 weight = robjects.FloatVector(args[0])
97 for i
in range(1, len(args)):
98 reps += r.rep(i, len(args[i]))
99 weight += robjects.FloatVector(args[i])
100 group = r.factor(reps)
101 robjects.globalenv[
"weight"] = weight
102 robjects.globalenv[
"group"] = group
103 lm = r.lm(
"weight ~ group")
107 anova_result = {
'fstat': aov[3][0],
109 'between': aov[2][0],
111 'nsteps': [len(i)
for i
in args],
118 """perform kruskal-wallis rank test"""
122 reps = r.rep(0, len(args[0]))
123 weight = robjects.FloatVector(args[0])
124 for i
in range(1, len(args)):
125 reps += r.rep(i, len(args[i]))
126 weight += robjects.FloatVector(args[i])
127 group = r.factor(reps)
128 aov = r(
'kruskal.test')(group, weight)
130 kruskal_result = {
'fstat': aov[0][0],
132 'nsteps': [len(i)
for i
in args],
135 return kruskal_result
138 def ttest(obs, target):
139 """perform a one-sample two-sided t-test on obs against target mean"""
140 test = r(
't.test')(robjects.IntVector(obs), mu=target)
141 return test[0][0], test[2][0]
144 def binom(obs, target):
145 """perform an exact binomial test on the mean of obs against target"""
148 test = r(
'binom.test')(success, trials, p=target)
149 return test[0][0], test[2][0]
153 """perform bartlett's test on the equality of variances of the
157 group = r.gl(ngroups, nreps)
158 weight = robjects.IntVector(args[0])
160 weight += robjects.IntVector(i)
161 robjects.globalenv[
"weight"] = weight
162 robjects.globalenv[
"group"] = group
163 var = r(
'bartlett.test')(weight, group)
164 return var[0][0], var[2][0]
168 """perform Fligner-Killeen non-parametric test of the variance equality"""
171 group = r.gl(ngroups, nreps)
172 weight = robjects.IntVector(args[0])
174 weight += robjects.IntVector(i)
175 robjects.globalenv[
"weight"] = weight
176 robjects.globalenv[
"group"] = group
177 var = r(
'fligner.test')(weight, group)
178 return var[0][0], var[2][0]
181 def power_test(ar, power=0.8, alpha=0.05):
182 """perform an anova power test and return
183 - the power of the test with this input data
184 - the number of points that would be needed to achieve a default
186 ar: the output of anova()
188 result = r(
'power.anova.test')(groups=ar[
'nreps'], n=min(ar[
'nsteps']),
189 between=ar[
'between'], within=ar[
'within'],
191 prdb(
'the power of this anova was: %.3f' % result[5][0])
192 result = r(
'power.anova.test')(groups=ar[
'nreps'],
193 between=ar[
'between'], within=ar[
'within'],
194 sig=alpha, pow=power)
195 prdb(
'To have a power of %.3f, there should be at least %d exchange '
196 'attempts.' % (power, result[1][0]))
200 def minimum_n(ar, alpha=0.05):
201 """This routine tries to return an estimate of the additional number of
202 exchange trials that could lead to a positive result of the anova (e.g.
203 the average ARs are not the same). It is still very crude. It also assumes
204 that a one-way anova was made.
205 ar: the output of anova()
209 nsteps = ar[
'nsteps']
215 return nsteps * (numpy.sqrt(Finv(1 - alpha, nreps - 1,
216 nreps * (nsteps - 1)) / fstat) - 1)
222 """When created, estimates the heat capacity from the energies or from the
223 indicator functions using the specified method. Two methods are then
224 available to guess the heat capacity at a given parameter value: get()
225 returns a single point, and mean() returns the linear average between two
229 def __init__(self, params, energies=None, indicators=None,
230 method=
"constant", temps=
None, write_cv=
False):
232 self.__initialized =
False
236 if method ==
"interpolate":
240 elif method ==
"constant":
242 self.get =
lambda p: self.__cv
243 self.mean =
lambda p1, p2: self.__cv
244 self.__cvfun = self.get
245 r(
'cvspline <- function(x) {%f}' % self.__cv)
246 elif method ==
"mbar":
249 self.mean = self.mean_mbar
251 raise NotImplementedError(method)
253 self.__initialized =
True
258 fl.write(
"".join([
"%f %f\n" % (x, self.get(x))
259 for x
in numpy.linspace(params[0] / 2, 2 * params[-1])]))
263 """interpolate using previous values, by reversing the approximate
266 if self.__initialized:
268 if len(indicators) != len(params) - 1:
270 "the length of indicators and params does not match!")
271 if params != tuple(sorted(params)):
272 raise NotImplementedError(
273 "unable to work on parameters that do not change "
276 prdb(
"storing params and means")
277 self.__params = params
278 self.__pmeans = [(params[i] + params[i + 1]) / 2.
279 for i
in range(len(params) - 1)]
280 prdb(
"computing __cv")
281 for i, ind
in enumerate(indicators):
282 mean = sum(ind) / float(len(ind))
283 Y2 = 2 * kB * erfinv(1 - mean) ** 2
288 (p1 ** 2 + p2 ** 2) * float(Y2) / (p2 - p1) ** 2))
291 self.__cvmean = sum([i[1]
for i
in self.__cv]) / float(len(self.__cv))
292 if self.__cvmean < 0:
293 raise ValueError(
"Cv mean is negative!")
294 self.__cvfun = spline(self.__cv, self.__cvmean)
298 """try to guess which constant cv fits best"""
299 if self.__initialized:
302 self.__cv = self.__cvmean
305 def needs_init(self):
306 if not self.__initialized:
307 raise RuntimeError(
"Class was not initialized correctly!")
310 "use MBAR to get the heat capacity"
311 raise NotImplementedError(
"estimate_cv_mbar")
313 def _isinbounds(self, p, params):
314 """returns True if p is within params, else false. the params list
315 must be sorted ascendingly."""
316 if p < params[0] - EPSILON
or p > params[-1] + EPSILON:
323 def _interpolate(self, xval, xlist):
324 """return interpolation of Cv at point xval, and return the average
325 instead if this value is negative.
327 self._isinbounds(xval, xlist)
328 val = self.__cvfun(xval)
335 """returns the point estimate of the first derivative of the energy
336 with respect to the replica exchange parameter (usually T or q).
337 This version assumes that the means of cv are given.
340 return self._interpolate(param, self.__pmeans)
343 """returns the point estimate of the first derivative of the energy
344 with respect to the replica exchange parameter (usually T or q).
345 This version assumes that the values of cv are given.
348 return self._interpolate(param, self.__params)
351 """estimate the mean of Cv between two points. Here the means were
355 return self._interpolate((pa + pb) / 2., self.__pmeans)
357 def mean_mbar(self, pa, pb):
364 def update_good_dumb(newp, oldp, *args, **kwargs):
365 """Here the old parameters are oldp[0] and oldp[1], and the starting point
366 is newp[0]. We should modify newp[1] so that the AR in the following cycle
367 is equal to the targetAR.
368 In the "dumb" method, the Cv and targetAR keywords are ignored.
369 Here the newp[1] parameter is modified because prior changes have set
370 newp[0] to a different value than oldp[0]. Thus, we should move newp[1] by
371 minimizing the effect on the AR since it is supposedly equal to targetAR.
372 In this simple method, the parameter is just translated.
375 "newp[0] has moved (%.3f -> %.3f), adjusting the position of newp[1]" %
377 return oldp[1] - (oldp[0] - newp[0])
380 def update_bad_dumb(newp, oldp, ind, targetAR=0.4, scale=0.1, **kwargs):
381 """Here the old parameters are oldp[0] and oldp[1], and the starting point
382 is newp[0]. We should modify newp[1] so that the AR in the following cycle
383 is equal to the targetAR.
384 In the "dumb" method, the Cv keyword is ignored. Here the newp[1]
385 parameter is modified to follow the possible translation of newp[0] by
386 calling update_good_dumb, and then newp[1] is added or subtracted scale% of
387 (oldp[1] - oldp[0]) to adjust to targetAR.
390 if newp[0] != oldp[0]:
391 newp[1] = update_good_dumb(newp, oldp)
392 if targetAR > sum(ind) / float(len(ind)):
393 prdb(
"""target AR is higher than expected, decreasing newp[1]""")
394 newp[1] -= scale * (oldp[1] - oldp[0])
396 prdb(
"""target AR is lower than expected, increasing newp[1]""")
397 newp[1] += scale * (oldp[1] - oldp[0])
401 def update_any_cv_step(newp, oldp, ind, targetAR=0.4, Cv=None, **kwargs):
402 """here we use the average AR formula of two gaussians to get newp[1] as a
403 function of newp[1], knowing the targetAR and estimating the Cv. If
404 targetAR is negative, consider that mean(ind) equals the target AR and
405 skip any calculation in the case that oldp[0] equals newp[0].
406 step: suppose the heat capacity is stepwise constant, i.e. use the heat
407 capacity at position newp[0] as an estimate of the mean of the heat
408 capacity between newp[0] and newp[1]. This does not require any
409 self-consistent loop.
414 if abs(oldp[0] - newp[0]) < EPSILON
and targetAR < 0:
417 targetAR = sum(ind) / float(len(ind))
419 Y = numpy.sqrt(2 * kB) * float(erfinv(1 - targetAR))
421 raise ValueError(
"""targetAR too small for this approximate method, use
422 the full self-consistent method instead.""")
423 return newp[0] * (cv + Y * numpy.sqrt(2 * cv - Y ** 2)) / (cv - Y ** 2)
426 def update_any_cv_sc(newp, oldp, ind, targetAR=0.4, Cv=None,
427 tol=1e-6, maxiter=10000):
428 """self-consistent solver version"""
432 if abs(oldp[0] - newp[0]) < EPSILON
and targetAR < 0:
435 targetAR = sum(ind) / float(len(ind))
437 Y = numpy.sqrt(2 * kB) * float(erfinv(1 - targetAR))
439 raise ValueError(
"""targetAR too small for this approximate method, use
440 the full self-consistent method instead.""")
441 targetp = newp[0] * (cv + Y * numpy.sqrt(2 * cv - Y ** 2)) / (cv - Y ** 2)
442 for i
in range(maxiter):
443 cv = Cv.mean(newp[0], targetp)
444 (oldtargetp, targetp) = (
445 targetp, newp[0] * (cv + Y * numpy.sqrt(2 * cv - Y ** 2))
447 if abs(targetp - oldtargetp) <= tol:
449 if numpy.isnan(targetp):
452 "targetAR too small for this approximate method, use the "
453 "full self-consistent method instead.")
455 raise ValueError(
"""something unexpected happened""")
457 prdb(
"""Warning: unable to converge the self-consistent after %d
458 iterations and a tolerance of %f. Change the method or decrease the
459 tolerance!""" % (maxiter, tol))
460 prdb(
"converged after %d iterations and a tolerance of %f for x=%f" %
465 def update_any_cv_scfull(newp, oldp, ind, targetAR=0.4, Cv=None,
466 tol=1e-6, maxiter=10000):
467 """self-consistent solver version, on the exact average AR equation"""
470 _ = r(
'u21 <- function(t1,t2) { integrate(Vectorize(cvspline),'
472 _ = r(
'b21 <- function(t1,t2) { 1./(kB*t2) - 1./(kB*t1) }')
473 _ = r(
'sigma2 <- function(t) {cvspline(t)*kB*t**2}')
474 _rovboltz = r(
'ovboltz <- function(t1,t2) {\
476 u21(t1,t2)/sqrt(2*(sigma2(t1)+sigma2(t2))))\
477 + exp(b21(t1,t2)*(u21(t1,t2)+b21(t1,t2)*(sigma2(t1)+sigma2(t2))/2))\
478 * (1+erf((u21(t1,t2)+b21(t1,t2)*(sigma2(t1)+sigma2(t2)))\
479 /(sqrt(2*(sigma2(t1)+sigma2(t2))))))\
482 'rootfn <- function(t2) {ovboltz(%f,t2)-%f}' %
486 if oldp[1] > oldp[0]:
491 while _rrootfn(tmp)[0] >= 0:
493 tmp += (oldp[1] - oldp[0])
495 raise RuntimeError(
"heat capacity goes negative")
497 raise RuntimeError(
'could not find zero of function!')
500 _runiroot = r(
'uniroot(rootfn,c(%f,%f),f.lower = %f, f.upper = %f, '
501 'tol = %f, maxiter = %d)'
502 % (newp[0], tmp, 1 - targetAR, -targetAR, tol, maxiter))
503 prdb(
"self-consistent solver converged after %s iterations and an "
504 "estimated precision of %s " % (_runiroot[2][0], _runiroot[3][0]))
510 _runiroot[0][0])[0]])
511 return _runiroot[0][0]
514 def update_any_cv_nr(newp, oldp, ind, targetAR=0.4, Cv=None, **kwargs):
515 """newton-raphson solver version"""
518 raise NotImplementedError
523 def are_equal_to_targetAR(
528 """here, all indicators have same average, we want to know if it is
533 deviations = sorted([(abs(sum(ind) / float(len(ind)) - targetAR), ind)
534 for pos, ind
in enumerate(indicators)])
535 deviant = deviations[-1]
538 if method ==
"ttest":
541 elif method ==
"binom":
544 raise NotImplementedError
547 test, pval = our_ttest(deviant[1], targetAR)
549 if abs(targetAR - sum(deviant[1]) / len(deviant[1])) > EPSILON:
559 def are_stationnary(indicators, alpha=0.05, method="anova"):
560 """test on the stationarity of the observations (block analysis). Done
561 so by launching an anova on the difference between the two halves of
565 if method ==
"kruskal":
570 tmp = numpy.array(indicators)
571 blocklen = len(indicators[0]) / 2
572 block = tmp[:, :blocklen] - tmp[:, blocklen:2 * blocklen]
573 if test(*block)[
'pval'] < alpha:
579 def are_equal(indicators, targetAR=0.4, alpha=0.05,
580 method=
"anova", varMethod=
"skip", power=0.8):
581 """Perform a one-way ANOVA or kruskal-wallis test on the indicators set,
582 and return True if one cannot exclude with risk alpha that the indicators
583 AR are different (i.e. True = all means are equal). Also performs a test
584 on the variance (they must be equal).
587 if min(targetAR, 1 - targetAR) * len(indicators[0]) <= 5
and \
588 (varMethod ==
"bartlett" or method ==
"anova"):
589 prdb(
"Warning: normal approximation to the binomial does not hold!")
592 if varMethod ==
"skip":
595 if varMethod ==
"bartlett":
596 pval = bartlett(*indicators)[1]
597 elif varMethod ==
"fligner":
598 pval = fligner(*indicators)[1]
600 raise NotImplementedError(
601 "variance testing method unknown: %s" %
604 prdb(
"Warning: performing mean test with unequal variances.")
606 if method ==
"kruskal":
611 tr = test(*indicators)
614 tr[
'result'] = tr[
'pval'] >= alpha
619 def find_good_ARs(indicators, targetAR=0.4, alpha=0.05, method="binom"):
620 """perform one-sample t-tests on each of the data sets, and return
621 a tuple of bool of size N-1, False if AR i is not equal to targetAR
626 means = sorted([(sum(ind) / float(len(ind)), pos, ind)
627 for pos, ind
in enumerate(indicators)])
630 if method ==
"ttest":
633 elif method ==
"binom":
636 raise NotImplementedError
640 prdb(
"starting left")
641 for (i, (mean, pos, ind))
in enumerate(means):
642 prdb(
"performing t-test on couple %d having average AR %f, "
643 "position %d" % (pos, mean, i))
645 test, pval = our_ttest(ind, targetAR)
647 if abs(targetAR - mean) > EPSILON:
653 isGoodTuple.append((pos,
False))
658 prdb(
"starting right")
659 for (i, (mean, pos, ind))
in enumerate(reversed(means)):
660 prdb(
"performing t-test on couple %d having average AR %f, position %d"
661 % (pos, mean, len(means) - 1 - i))
662 if our_ttest(ind, targetAR)[1] < alpha:
664 isGoodTuple.append((pos,
False))
666 goodstop = len(means) - 1 - i
670 if len(isGoodTuple) > len(indicators):
671 return tuple([
False] * len(indicators))
673 elif len(isGoodTuple) == 0:
674 return tuple([
True] * len(indicators))
677 isGoodTuple.extend([(means[i][1],
True)
for i
in
678 range(goodstart, goodstop + 1)])
680 return tuple([tup[1]
for tup
in isGoodTuple])
685 def mean_first_passage_times(
690 """compute mean first passage times as suggested in
691 Nadler W, Meinke J, Hansmann UHE, Phys Rev E *78* 061905 (2008)
693 use_avgAR : if a list of average ARs is given computes everything from
694 average AR; if it is False, compute by direct counting of events.
697 If use_avgAR == False:
698 tau0, tauN, chose_N, times0, timesN
700 tau0, tauN, None, None, None
701 tau0[i]: average time to go from replica 0 to replica i
702 tauN[i]: average time to go from replica N to replica i
703 times0 and timesN are the corresponding lists of single events.
707 from numpy
import array, zeros
708 replicanums = array(replicanums_ori)[:, start::subs]
716 for state
in range(1, N):
717 tau0[state] = tau0[state - 1] + \
718 state / (float(use_avgAR[state - 1]))
719 for state
in reversed(range(1, N)):
720 tauN[state - 1] = tauN[state] \
721 + (N - (state - 1)) / (float(use_avgAR[state - 1]))
723 return tau0, tauN,
None,
None,
None
727 if sys.version_info[0] >= 3:
730 from itertools
import izip
735 store0 = zeros((N, N), dtype=bool)
736 last0 = [0
for i
in range(N)]
737 already0 = [
False for i
in range(N)]
738 times0 = [[]
for i
in range(N)]
739 storeN = zeros((N, N), dtype=bool)
740 lastN = [0
for i
in range(N)]
741 alreadyN = [
False for i
in range(N)]
742 timesN = [[]
for i
in range(N)]
745 for time, frame
in enumerate(izip(*replicanums)):
747 if not already0[frame[0]]:
748 last0[frame[0]] = time
749 store0[frame[0], :] =
True
750 already0[frame[0]] =
True
752 if not alreadyN[frame[-1]]:
753 lastN[frame[-1]] = time
754 storeN[frame[-1], :] =
True
755 alreadyN[frame[-1]] =
True
757 already0[frame[1]] =
False
758 alreadyN[frame[-2]] =
False
760 for state, rep
in enumerate(frame):
761 if store0[rep, state]:
763 store0[rep, state] =
False
765 times0[state].append(time - last0[rep])
766 if storeN[rep, state]:
768 storeN[rep, state] =
False
770 timesN[state].append(time - lastN[rep])
773 chose_N = [len(timesN[state]) > len(times0[state])
for state
in
775 for state
in range(N):
776 tauN[state] = sum(timesN[state]) / float(len(timesN[state]))
777 tau0[state] = sum(times0[state]) / float(len(times0[state]))
780 return tau0, tauN, chose_N, times0, timesN
783 def compute_effective_fraction(tau0, tauN, chose_N):
784 """input: tau0, tauN, chose_N
785 output: effective fraction f(T) (P_up(n)) as introduced in
786 Trebst S, Troyer M, Hansmann UHE, J Chem Phys *124* 174903 (2006).
788 Nadler W, Hansmann UHE, Phys Rev E *75* 026109 (2007)
789 and whose calculation is enhanced in
790 Nadler W, Meinke J, Hansmann UHE, Phys Rev E *78* 061905 (2008)
791 the ideal value of f(n) should be 1 - n/N
799 nstar = N - sum([int(a)
for a
in chose_N]) - 1
801 prdb(
"n* = %d" % nstar)
805 for state
in range(2, nstar + 1):
806 h0[state] = h0[state - 1] + \
807 (tau0[state] - tau0[state - 1]) / float(state)
811 for state
in reversed(range(nstar, N - 1)):
812 hN[state] = hN[state + 1] + \
813 (tauN[state] - tauN[state + 1]) / float(N - state)
818 for n
in range(1, nstar + 1):
819 pup[n] = 1 - h0[n] / (h0[nstar] + hN[nstar])
820 for n
in range(nstar + 1, N):
821 pup[n] = hN[n] / (h0[nstar] + hN[nstar])
826 def spline_diffusivity(pup, params):
827 """spline interpolation of diffusivity: D = 1/(df/dT * heta)
829 from numpy
import linspace
830 robjects.globalenv[
"hetay"] = \
831 robjects.FloatVector(linspace(0, 1, num=len(params)).tolist())
832 robjects.globalenv[
"hetax"] = robjects.FloatVector(params)
833 robjects.globalenv[
"pupx"] = robjects.FloatVector(params)
834 robjects.globalenv[
"pupy"] = robjects.FloatVector(pup)
835 _ = r(
'heta <- splinefun(hetax,hetay,method="monoH.FC")')
836 _ = r(
'eff <- splinefun(pupx,pupy,method="monoH.FC")')
837 diff = r(
'diff <- function(x) {-1/(heta(x,deriv=1)*eff(x,deriv=1))}')
838 return lambda x: diff(x)[0]
842 def compute_indicators(replicanums, subs=1, start=0):
843 """input: replicanums : a list of N lists of size M, where N is the number
844 of states and M is the length of the simulation. Each element is an
845 integer, and corresponds to the label of a replica.
846 output: an indicator function of exchanges (size (N-1)x(M-1)), 1 if
847 exchange and 0 if not.
850 if replicanums[n][m] == replicanums[n + 1][m + 1] \
851 and replicanums[n][m + 1] == replicanums[n + 1][m]:
857 for n
in range(len(replicanums) - 1):
860 for m
in range(len(replicanums[n]) - 1)][start::subs])
866 def update_params_nonergodic(pup, params, write_g=False, num=False):
868 from numpy
import linspace
870 g = linear_interpolation(list(zip(pup, params)), 0)
872 d = spline_diffusivity(pup, params)
874 fl.write(
"".join([
"%f %f\n" % (x, g(x))
875 for x
in linspace(0, 1, num=100)]))
877 fl = open(
'diffusivity',
'w')
878 fl.write(
''.join([
"%f %f\n" % (x, d(x))
for x
in
879 linspace(params[0], params[-1], num=100)]))
881 fl = open(
'pup',
'w')
882 fl.write(
"".join([
"%f %f\n" % (i, j)
for (i, j)
in zip(params, pup)]))
886 newparams = [g(i)
for i
in reversed(linspace(0, 1, num=len(params)))]
888 newparams = [g(i)
for i
in reversed(linspace(0, 1, num=num))]
890 newparams[0] = params[0]
891 newparams[-1] = params[-1]
897 indicators, params, isGood, targetAR=0.4, immobilePoint=1,
898 Cv=
None, badMethod=
"dumb", goodMethod=
"dumb", dumb_scale=0.1):
899 """update the parameters according to the isGood tuple and using the
902 newparams = list(params)
904 if immobilePoint != 1:
905 raise NotImplementedError
907 if Cv
is None and (badMethod !=
"dumb" or goodMethod !=
"dumb"):
908 raise RuntimeError(
"""Cv needs to be estimated if using other methods
909 than 'dumb' for updating!""")
911 if goodMethod ==
"dumb":
912 update_good = update_good_dumb
913 elif goodMethod ==
"step":
914 update_good = update_any_cv_step
915 elif goodMethod ==
"sc":
916 update_good = update_any_cv_sc
917 elif goodMethod ==
"scfull":
918 update_good = update_any_cv_scfull
919 elif goodMethod ==
"nr":
920 update_good = update_any_cv_nr
922 raise NotImplementedError(goodMethod)
923 if badMethod ==
"dumb":
924 update_bad = update_bad_dumb
925 elif badMethod ==
"step":
926 update_bad = update_any_cv_step
927 elif badMethod ==
"sc":
928 update_bad = update_any_cv_sc
929 elif badMethod ==
"scfull":
930 update_bad = update_any_cv_scfull
931 elif badMethod ==
"nr":
932 update_bad = update_any_cv_nr
934 raise NotImplementedError(badMethod)
937 for pos
in range(len(params) - 1):
939 newparams[pos + 1] = update_good(
940 newparams[pos:pos + 2], params[pos:pos + 2],
941 indicators[pos], targetAR=targetAR, Cv=Cv)
943 newparams[pos + 1] = update_bad(
944 newparams[pos:pos + 2], params[pos:pos + 2],
945 indicators[pos], targetAR=targetAR, Cv=Cv, scale=dumb_scale)
947 return tuple(newparams)
950 def tune_params_flux(replicanums, params, subs=1, start=0, alpha=0.05,
951 testMethod=
'anova', meanMethod=
'binom', use_avgAR=
False,
952 power=0.8, num=
False):
957 if use_avgAR
is not False:
958 raise NotImplementedError
960 prdb(
"computing mean first passage times")
961 tau0, tauN, chose_N, times0, timesN = mean_first_passage_times(
962 replicanums, subs=subs, start=start, use_avgAR=use_avgAR)
964 prdb(
"average round trip time: %.2f (%d+%d events)" %
965 (tau0[-1] + tauN[0], len(times0[-1]), len(timesN[0])))
966 prdb(
"checking if the parameterset needs to be improved")
971 nstar = N - sum([int(a)
for a
in chose_N]) - 1
975 for n
in range(1, N - 1):
977 reduced.append([i * 2.0 / ((N - n) * (N - n + 1))
980 reduced.append([i * 2.0 / (n * (n + 1))
for i
in times0[n]])
982 anova_result = are_equal(reduced, alpha=alpha, method=testMethod,
984 if (anova_result[
'result']):
985 prdb(
"flux is constant, nothing to do!")
986 min_n = minimum_n(anova_result, alpha)
987 prdb(
'Try to rerun this test with at least %d more samples.' %
989 return (
False, min_n)
993 prdb(
"parameterset not optimal, computing effective fraction")
994 pup = compute_effective_fraction(tau0, tauN, chose_N)
997 prdb(
"returning new parameterset")
998 params = update_params_nonergodic(pup, params, num=num)
1000 return (
True, params)
1003 def tune_params_ar(indicators, params, targetAR=0.4, alpha=0.05,
1004 immobilePoint=1, CvMethod=
"skip", badMethod=
"dumb",
1005 goodMethod=
"dumb", varMethod=
"skip", testMethod=
"anova",
1006 meanMethod=
"binom", energies=
None, temps=
None, power=0.8,
1008 """Tune the replica-exchange parameters and return a new set.
1011 indicators -- an (N-1)x(M-1) table, where entry at position (j,i)
1012 is True if replica j and j+1 performed an exchange between
1014 params -- the current set of N parameters used in the simulation.
1017 targetAR -- the target AR which is wanted for the simulation
1019 alpha -- the type 1 error on the one-way ANOVA and subsequent
1020 t-tests (default: 5%)
1021 immobilePoint -- which replica should keep it's parameter fixed
1022 (default: 1st replica, e.g. 1)
1023 CvMethod -- the heat capacity estimation method (default:
1024 "skip", other options: "mbar", "spline", "constant")
1025 badMethod -- how to correct the (j+1)th parameter if the
1026 acceptance ratio between replicas j and j+1 is off the
1027 target value (default: "dumb", options: "step", "sc",
1029 goodMethod -- how to update the value of the (j+1)th parameter
1030 in the case of a correctly exchanging couple, but if the jth
1031 parameter has been modified (default: "dumb",options: "step",
1032 "sc" self-consistent, "scfull" self-consistent using the exact
1033 equation, "nr" newton-raphson solver for the exact equation)
1034 dumb_scale -- (0.0-1.0) in the "dumb" method, scale wrong temperature
1035 intervals by this amount. (default: 0.1)
1036 testMethod -- how to test for the difference of the means,
1037 either "anova" for a one-way anova, or "kruskal" for a
1038 Kruskal-Wallis one-way non-parametric anova.
1039 meanMethod -- "ttest" for a two-sided one-sample t-test,
1040 "binom" for an exact binomial test of the probability.
1041 varMethod -- how to test for the equality of variances.
1042 "fligner" for the Fligner-Killeen non-parametric test,
1043 "bartlett" for Bartlett's test, "skip" to pass.
1044 energies -- if CvMethod is set to "mbar", the energies of each
1045 state as a function of time are used to estimate the heat capacity.
1046 temps -- the temperatures of the simulations, if estimating
1050 returns a tuple: (bool, params). bool is True if params have
1051 changed, and params is the new set.
1056 prdb(
"performing ANOVA")
1057 anova_result = are_equal(indicators, targetAR, alpha, method=testMethod,
1058 varMethod=varMethod, power=power)
1059 if (anova_result[
'result']
1060 and are_equal_to_targetAR(indicators, targetAR, alpha,
1061 method=meanMethod)):
1062 prdb(
"all means are equal to target AR, nothing to do!")
1063 min_n = minimum_n(anova_result, alpha)
1065 'Try to rerun this test with at least %d more samples.' %
1067 return (
False, min_n)
1068 prdb(
"some means are different, performing t-tests")
1071 isGood = find_good_ARs(indicators, targetAR, alpha, method=meanMethod)
1072 if not (
False in isGood):
1073 prdb(
"""Bad luck: ANOVA says means are not identical, but t-tests
1074 can't find the bad rates...""")
1075 return (
False, params)
1079 prdb(
"performing stationarity test")
1080 if not are_stationnary(indicators, alpha):
1081 prdb(
"Warning: Some simulations are not stationary!")
1085 prdb(
"launching Cv estimation or skipping it")
1086 if CvMethod ==
"skip":
1088 elif CvMethod ==
"interpolate" or CvMethod ==
"constant":
1089 Cv =
CvEstimator(params, indicators=indicators, method=CvMethod)
1090 elif CvMethod ==
"mbar":
1097 raise NotImplementedError(CvMethod)
1100 prdb(
'updating params')
1102 params = update_params(indicators, params, isGood, targetAR=targetAR,
1103 immobilePoint=immobilePoint, Cv=Cv,
1104 badMethod=badMethod, goodMethod=goodMethod,
1105 dumb_scale=dumb_scale)
1108 return (
True, params)
1111 if __name__ ==
'__main__':
1114 for i
in range(1, 8):
1116 tuple(numpy.fromfile(
'data/replica-indices/%d.rep' % i,
1117 dtype=int, sep=
'\n')))
1119 prdb(
"replicanums: %dx%d" % (len(replicanums), len(replicanums[0])))
1120 params = tuple(numpy.fromfile(
'data/temperatures', sep=
' '))
1123 indicators = compute_indicators(replicanums, subs=1, start=0)
1124 prdb(
"indicators: %dx%d" % (len(indicators), len(indicators[0])))
1125 prdb(
"Exchange rate:")
1126 prdb(numpy.array([sum(ind) / float(len(ind))
for ind
in indicators]))
1127 numpy.array([sum(ind) / float(len(ind))
1128 for ind
in indicators]).tofile(
'xchgs', sep=
'\n')
1129 changed, newparams = tune_params_ar(
1130 indicators, params, targetAR=0.25,
1131 badMethod=
"dumb", goodMethod=
"dumb", CvMethod=
"skip",
1132 testMethod=
"anova", alpha=0.05)
1134 print(
"Parameter set seems optimal.")
1136 if True not in [abs(newparams[i + 1] - newparams[i]) < 1e-3
1137 for i
in range(len(newparams) - 1)]:
1138 numpy.array(newparams).tofile(
'data/temperatures', sep=
' ')
1140 print(
"PROBLEM IN NEW PARAMETERSET -> not saved")
1141 print(
"params :", params)
1142 print(
"new params:", newparams)
def estimate_cv_constant
try to guess which constant cv fits best
def estimate_cv_interpolate
interpolate using previous values, by reversing the approximate overlap function
When created, estimates the heat capacity from the energies or from the indicator functions using the...
def mean_interp
estimate the mean of Cv between two points.
def estimate_cv_mbar
use MBAR to get the heat capacity
def get_mbar
returns the point estimate of the first derivative of the energy with respect to the replica exchange...
def get_interp
returns the point estimate of the first derivative of the energy with respect to the replica exchange...