4 This module provides a few methods to improve the efficiency of a replica-exchange simulation
5 by tuning its parameters.
14 from numpy.random
import randint
15 import rpy2.robjects
as robjects
17 kB = 1.3806503 * 6.0221415 / 4184.0
28 robjects.globalenv[
"kB"] = kB
29 _rinverf = r(
'invErf <- function(x) {qnorm((1 + x) /2) / sqrt(2)}')
30 _rerf = r(
'erf <- function(x) {2 * pnorm(x * sqrt(2)) - 1}')
41 return _rinvF(x,d1,d2)[0]
43 def spline(xy, mean,method=None):
44 """spline interpolation of (x,y) coordinates. If interpolation goes
45 negative, replace by mean value.
48 robjects.globalenv[
"x"] = robjects.FloatVector(x)
49 robjects.globalenv[
"y"] = robjects.FloatVector(y)
53 r(
'cvsplinenonbounded <- splinefun(x,y)')
55 r(
'cvsplinenonbounded <- splinefun(x,y,method="%s")' % method)
56 _rinterp = r(
'cvspline <- function(x) { tmp = cvsplinenonbounded(x); if (tmp>0) {tmp} else {%f}}' % mean)
62 def linear_interpolation(xy, mean):
63 """linear interpolation of (x,y) coordinates. No extrapolation possible.
66 robjects.globalenv[
"x"] = robjects.FloatVector(x)
67 robjects.globalenv[
"y"] = robjects.FloatVector(y)
70 _rinterp = r(
'cvspline <- approxfun(x,y)')
80 """perform anova using R and return statistic, p-value, between and within variance"""
84 reps = r.rep(0,len(args[0]))
85 weight = robjects.FloatVector(args[0])
86 for i
in xrange(1,len(args)):
87 reps += r.rep(i,len(args[i]))
88 weight += robjects.FloatVector(args[i])
89 group = r.factor(reps)
90 robjects.globalenv[
"weight"] = weight
91 robjects.globalenv[
"group"] = group
92 lm = r.lm(
"weight ~ group")
96 anova_result = {
'fstat':aov[3][0],
100 'nsteps':[len(i)
for i
in args],
106 """perform kruskal-wallis rank test"""
110 reps = r.rep(0,len(args[0]))
111 weight = robjects.FloatVector(args[0])
112 for i
in xrange(1,len(args)):
113 reps += r.rep(i,len(args[i]))
114 weight += robjects.FloatVector(args[i])
115 group = r.factor(reps)
116 aov = r(
'kruskal.test')(group,weight)
118 kruskal_result = {
'fstat':aov[0][0],
120 'nsteps':[len(i)
for i
in args],
123 return kruskal_result
125 def ttest(obs, target):
126 """perform a one-sample two-sided t-test on obs against target mean"""
127 test = r(
't.test')(robjects.IntVector(obs),mu=target)
128 return test[0][0],test[2][0]
130 def binom(obs, target):
131 """perform an exact binomial test on the mean of obs against target"""
134 test = r(
'binom.test')(success,trials,p=target)
135 return test[0][0],test[2][0]
138 """perform bartlett's test on the equality of variances of the observations"""
141 group = r.gl(ngroups,nreps)
142 weight = robjects.IntVector(args[0])
144 weight += robjects.IntVector(i)
145 robjects.globalenv[
"weight"] = weight
146 robjects.globalenv[
"group"] = group
147 var = r(
'bartlett.test')(weight,group)
148 return var[0][0], var[2][0]
151 """perform Fligner-Killeen non-parametric test of the variance equality"""
154 group = r.gl(ngroups,nreps)
155 weight = robjects.IntVector(args[0])
157 weight += robjects.IntVector(i)
158 robjects.globalenv[
"weight"] = weight
159 robjects.globalenv[
"group"] = group
160 var = r(
'fligner.test')(weight,group)
161 return var[0][0], var[2][0]
163 def power_test(ar, power=0.8, alpha=0.05):
164 """perform an anova power test and return
165 - the power of the test with this input data
166 - the number of points that would be needed to achieve a default power of 0.8
167 ar: the ouput of anova()
169 result = r(
'power.anova.test')(groups=ar[
'nreps'], n=min(ar[
'nsteps']),
170 between=ar[
'between'], within=ar[
'within'], sig=alpha)
171 prdb(
'the power of this anova was: %.3f' % result[5][0])
172 result = r(
'power.anova.test')(groups=ar[
'nreps'],
173 between=ar[
'between'], within=ar[
'within'], sig=alpha, pow=power)
174 prdb(
'To have a power of %.3f, there should be at least %d exchange attemps.' % (power, result[1][0]) )
177 def minimum_n(ar, alpha=0.05):
178 """This routine tries to return an estimate of the additional number of exchange trials that
179 could lead to a positive result of the anova (e.g. the average ARs are not the same). It is
180 still very crude. It also assumes that a one-way anova was made.
181 ar: the output of anova()
185 nsteps = ar[
'nsteps']
191 return nsteps*(sqrt(Finv(1-alpha,nreps-1,nreps*(nsteps-1))/fstat)-1)
196 """When created, estimates the heat capacity from the energies or from the
197 indicator functions using the specified method. Two methods are then
198 available to guess the heat capacity at a given parameter value: get()
199 returns a single point, and mean() returns the linear average between two
203 def __init__(self, params, energies=None, indicators=None,
204 method=
"constant", temps=
None, write_cv =
False):
206 kB = 1.3806503 * 6.0221415 / 4184.0
207 self.__initialized =
False
211 if method ==
"interpolate":
215 elif method ==
"constant":
217 self.get =
lambda p: self.__cv
218 self.mean =
lambda p1,p2: self.__cv
219 self.__cvfun = self.get
220 r(
'cvspline <- function(x) {%f}' % self.__cv)
221 elif method ==
"mbar":
224 self.mean = self.mean_mbar
226 raise NotImplementedError, method
228 self.__initialized =
True
233 fl.write(
"".join([
"%f %f\n" % (x,self.get(x))
for x
in linspace(params[0]/2,2*params[-1])]))
237 """interpolate using previous values, by reversing the approximate overlap
240 if self.__initialized :
242 if len(indicators) != len(params) - 1:
244 "the length of indicators and params does not match!"
245 if params != tuple(sorted(params)):
246 raise NotImplementedError, \
247 "unable to work on parameters that do not change monotonically"
249 prdb(
"storing params and means")
250 self.__params = params
251 self.__pmeans = [(params[i] + params[i+1])/2. \
252 for i
in xrange(len(params)-1)]
253 prdb(
"computing __cv")
254 for i,ind
in enumerate(indicators):
255 mean = sum(ind)/float(len(ind))
256 Y2 = 2*kB*erfinv(1-mean)**2
259 self.__cv.append((self.__pmeans[i], (p1**2+p2**2)*float(Y2)/(p2-p1)**2))
262 self.__cvmean = sum([ i[1]
for i
in self.__cv])/float(len(self.__cv))
263 if self.__cvmean < 0:
264 raise ValueError,
"Cv mean is negative!"
265 self.__cvfun = spline(self.__cv, self.__cvmean)
269 """try to guess which constant cv fits best"""
270 if self.__initialized :
273 self.__cv = self.__cvmean
276 def needs_init(self):
277 if not self.__initialized:
278 raise RuntimeError,
"Class was not initialized correctly!"
281 "use MBAR to get the heat capacity"
282 raise NotImplementedError, method
283 if self.__initialized :
286 def _isinbounds(self,p,params):
287 """returns True if p is within parms, else false. the params list must be sorted ascendingly."""
288 if p < params[0] - EPSILON
or p > params[-1] + EPSILON:
294 def _interpolate(self,xval,xlist):
295 """return interpolation of Cv at point xval, and return the average instead
296 if this value is negative.
298 self._isinbounds(xval,xlist)
299 val = self.__cvfun(xval)
306 """returns the point estimate of the first derivative of the energy with
307 respect to the replica exchange parameter (usually T or q).
308 This version assumes that the means of cv are given.
311 return self._interpolate(param, self.__pmeans)
314 """returns the point estimate of the first derivative of the energy with
315 respect to the replica exchange parameter (usually T or q).
316 This version assumes that the values of cv are given.
319 return self._interpolate(param, self.__params)
322 """estimate the mean of Cv between two points. Here the means were
326 return self._interpolate((pa+pb)/2., self.__pmeans)
328 def mean_mbar(self,pa,pb):
333 def update_good_dumb(newp, oldp, *args, **kwargs):
334 """Here the old parameters are oldp[0] and oldp[1], and the starting point
335 is newp[0]. We should modify newp[1] so that the AR in the following cycle
336 is equal to the targetAR.
337 In the "dumb" method, the Cv and targetAR keywords are ignored.
338 Here the newp[1] parameter is modified because prior changes have set
339 newp[0] to a different value than oldp[0]. Thus, we should move newp[1] by
340 minimizing the effect on the AR since it is supposedly equal to targetAR. In
341 this simple method, the parameter is just translated.
343 prdb(
"newp[0] has moved (%.3f -> %.3f), adjusting the position of newp[1]" %
345 return oldp[1] - (oldp[0] - newp[0])
347 def update_bad_dumb(newp, oldp, ind, targetAR = 0.4, scale=0.1, **kwargs):
348 """Here the old parameters are oldp[0] and oldp[1], and the starting point
349 is newp[0]. We should modify newp[1] so that the AR in the following cycle
350 is equal to the targetAR.
351 In the "dumb" method, the Cv keyword is ignored. Here the newp[1]
352 parameter is modified to follow the possible translation of newp[0] by
353 calling update_good_dumb, and then newp[1] is added or substracted scale% of
354 (oldp[1] - oldp[0]) to adjust to targetAR.
357 if newp[0] != oldp[0]:
358 newp[1] = update_good_dumb(newp,oldp)
359 if targetAR > sum(ind)/float(len(ind)):
360 prdb(
"""target AR is higher than expected, decreasing newp[1]""")
361 newp[1] -= scale * (oldp[1]-oldp[0])
363 prdb(
"""target AR is lower than expected, increasing newp[1]""")
364 newp[1] += scale * (oldp[1]-oldp[0])
367 def update_any_cv_step(newp, oldp, ind, targetAR = 0.4, Cv = None, **kwargs):
368 """here we use the average AR formula of two gaussians to get newp[1] as a
369 function of newp[1], knowing the targetAR and estimating the Cv. If targetAR
370 is negative, consider that mean(ind) equals the target AR and skip any
371 calculation in the case that oldp[0] equals newp[0].
372 step: suppose the heat capacity is stepwise constant, i.e. use the heat
373 capacity at position newp[0] as an estimate of the mean of the heat capacity
374 between newp[0] and newp[1]. This does not require any self-consistent loop.
379 if abs(oldp[0] - newp[0]) < EPSILON
and targetAR < 0:
382 targetAR = sum(ind)/float(len(ind))
384 Y = sqrt(2*kB)*float(erfinv(1-targetAR))
386 raise ValueError,
"""targetAR too small for this approximate method, use
387 the full self-consistent method instead."""
388 return newp[0]*(cv+Y*sqrt(2*cv - Y**2))/(cv-Y**2)
390 def update_any_cv_sc(newp, oldp, ind, targetAR=0.4, Cv=None,
391 tol=1e-6, maxiter=10000):
392 """self-consistent solver version"""
396 if abs(oldp[0] - newp[0]) < EPSILON
and targetAR < 0:
399 targetAR = sum(ind)/float(len(ind))
401 Y = sqrt(2*kB)*float(erfinv(1-targetAR))
403 raise ValueError,
"""targetAR too small for this approximate method, use
404 the full self-consistent method instead."""
405 targetp = newp[0]*(cv+Y*sqrt(2*cv - Y**2))/(cv-Y**2)
406 for i
in xrange(maxiter):
407 cv = Cv.mean(newp[0],targetp)
408 (oldtargetp, targetp) = (targetp, newp[0]*(cv+Y*sqrt(2*cv-Y**2))/(cv-Y**2))
409 if abs(targetp - oldtargetp) <= tol:
413 raise ValueError,
"""targetAR too small for this approximate method, use
414 the full self-consistent method instead."""
416 raise ValueError,
"""something unexpected happened"""
418 prdb(
"""Warning: unable to converge the self-consistent after %d
419 iterations and a tolerance of %f. Change the method or decrease the
420 tolerance!""" % (maxiter, tol))
421 prdb(
"converged after %d iterations and a tolerance of %f for x=%f" %
425 def update_any_cv_scfull(newp, oldp, ind, targetAR=0.4, Cv=None,
426 tol=1e-6, maxiter=10000):
427 """self-consistent solver version, on the exact average AR equation"""
431 _ru21 = r(
'u21 <- function(t1,t2) { integrate(Vectorize(cvspline),t1,t2)$value }')
432 _rb21 = r(
'b21 <- function(t1,t2) { 1./(kB*t2) - 1./(kB*t1) }')
433 _rsigma2 = r(
'sigma2 <- function(t) {cvspline(t)*kB*t**2}')
434 _rovboltz = r(
'ovboltz <- function(t1,t2) {\
436 u21(t1,t2)/sqrt(2*(sigma2(t1)+sigma2(t2))))\
437 + exp(b21(t1,t2)*(u21(t1,t2)+b21(t1,t2)*(sigma2(t1)+sigma2(t2))/2))\
438 * (1+erf((u21(t1,t2)+b21(t1,t2)*(sigma2(t1)+sigma2(t2)))/(sqrt(2*(sigma2(t1)+sigma2(t2))))))\
440 _rrootfn = r(
'rootfn <- function(t2) {ovboltz(%f,t2)-%f}' % (newp[0],targetAR))
443 if oldp[1] > oldp[0]:
448 while _rrootfn(tmp)[0] >= 0:
450 tmp += (oldp[1] - oldp[0])
452 raise RuntimeError,
"heat capacity goes negative"
454 raise RuntimeError,
'could not find zero of function!'
457 _runiroot = r(
'uniroot(rootfn,c(%f,%f),f.lower = %f, f.upper = %f, tol = %f, maxiter = %d)' % (newp[0],tmp,
458 1-targetAR, -targetAR, tol, maxiter))
459 prdb(
"self-consistent solver converged after %s iterations and an estimated precision of %s " % (_runiroot[2][0],_runiroot[3][0]))
460 prdb([
"root:",_runiroot[0][0],
"overlap:", _rovboltz(newp[0],_runiroot[0][0])[0]])
461 return _runiroot[0][0]
463 def update_any_cv_nr(newp, oldp, ind, targetAR = 0.4, Cv = None, **kwargs):
464 """newton-raphson solver version"""
467 raise NotImplementedError
470 def are_equal_to_targetAR(indicators,targetAR=0.4,alpha=0.05, method = "binom"):
471 """here, all indicators have same average, we want to know if it is equal to
476 deviations=[(abs(sum(ind)/float(len(ind))-targetAR),ind)
for pos,ind
in enumerate(indicators)]
478 deviant = deviations[-1]
481 if method ==
"ttest":
484 elif method ==
"binom":
487 raise NotImplementedError
490 test,pval = ttest(deviant[1],targetAR)
492 if abs(targetAR - sum(deviant[1])/len(deviant[1])) > EPSILON:
501 def are_stationnary(indicators, alpha=0.05, method = "anova"):
502 """test on the stationarity of the observations (block analysis). Done so by
503 launching an anova on the difference between the two halves of each observations.
506 if method ==
"kruskal":
511 tmp = array(indicators)
512 blocklen = len(indicators[0])/2
513 block = tmp[:,:blocklen] - tmp[:,blocklen:2*blocklen]
514 if test(*block)[
'pval'] < alpha:
519 def are_equal(indicators, targetAR=0.4, alpha = 0.05,
520 method =
"anova", varMethod =
"skip", power=0.8):
521 """Perform a one-way ANOVA or kruskal-wallis test on the indicators set,
522 and return True if one cannot exclude with risk alpha that the indicators
523 AR are different (i.e. True = all means are equal). Also performs a test
524 on the variance (they must be equal).
527 if min(targetAR,1-targetAR)*len(indicators[0]) <= 5
and \
528 (varMethod ==
"bartlett" or method ==
"anova"):
529 prdb(
"Warning: normal approximation to the binomial does not hold!")
532 if varMethod ==
"skip":
535 if varMethod ==
"bartlett":
536 pval = bartlett(*indicators)[1]
537 elif varMethod ==
"fligner":
538 pval = killeen(*indicators)[1]
540 raise NotImplementedError,
"variance testing method unknown: %s" % varMethod
542 prdb(
"Warning: performing mean test with unequal variances.")
545 if method ==
"kruskal":
550 tr = test(*indicators)
553 tr[
'result']= tr[
'pval'] >= alpha
564 def find_good_ARs(indicators, targetAR=0.4, alpha=0.05, method="binom"):
565 """perform one-sample t-tests on each of the data sets, and return
566 a tuple of bool of size N-1, False if AR i is not equal to targetAR
571 means=[(sum(ind)/float(len(ind)),pos,ind)
for pos,ind
in enumerate(indicators)]
575 if method ==
"ttest":
578 elif method ==
"binom":
581 raise NotImplementedError
585 prdb(
"starting left")
586 for (i,(mean,pos,ind))
in enumerate(means):
587 prdb(
"performing t-test on couple %d having average AR %f, position %d" % (pos,mean,i))
589 test,pval = ttest(ind,targetAR)
591 if abs(targetAR - mean) > EPSILON:
597 isGoodTuple.append((pos,
False))
602 prdb(
"starting right")
603 for (i,(mean,pos,ind))
in enumerate(reversed(means)):
604 prdb(
"performing t-test on couple %d having average AR %f, position %d" \
605 % (pos,mean,len(means)-1-i))
606 if ttest(ind,targetAR)[1] < alpha:
608 isGoodTuple.append((pos,
False))
610 goodstop = len(means)-1-i
614 if len(isGoodTuple) > len(indicators):
615 return tuple([
False]*len(indicators))
617 elif len(isGoodTuple) == 0:
618 return tuple([
True]*len(indicators))
621 isGoodTuple.extend([(means[i][1],
True)
for i
in
622 xrange(goodstart,goodstop+1)])
624 return tuple([tup[1]
for tup
in isGoodTuple])
628 def mean_first_passage_times(replicanums_ori, subs=1, start=0, use_avgAR=False):
629 """compute mean first passage times as suggested in
630 Nadler W, Meinke J, Hansmann UHE, Phys Rev E *78* 061905 (2008)
632 use_avgAR : if a list of average ARs is given computes everything from
633 average AR; if it is False, compute by direct counting of events.
636 If use_avgAR == False:
637 tau0, tauN, chose_N, times0, timesN
639 tau0, tauN, None, None, None
640 tau0[i]: average time to go from replica 0 to replica i
641 tauN[i]: average time to go from replica N to replica i
642 times0 and timesN are the corresponding lists of single events.
646 from numpy
import array,zeros
647 from pprint
import pprint,pformat
648 replicanums = array(replicanums_ori)[:,start::subs]
656 for state
in xrange(1,N):
657 tau0[state] = tau0[state-1] + state/(float(use_avgAR[state-1]))
658 for state
in reversed(xrange(1,N)):
659 tauN[state-1] = tauN[state] \
660 + (N-(state-1))/(float(use_avgAR[state-1]))
662 return tau0, tauN,
None,
None,
None
666 from itertools
import izip
671 store0 = zeros((N,N), dtype=bool)
672 last0=[0
for i
in xrange(N)]
673 already0 = [
False for i
in xrange(N)]
674 times0=[[]
for i
in xrange(N)]
675 storeN = zeros((N,N), dtype=bool)
676 lastN=[0
for i
in xrange(N)]
677 alreadyN = [
False for i
in xrange(N)]
678 timesN=[[]
for i
in xrange(N)]
681 for time, frame
in enumerate(izip(*replicanums)):
683 if not already0[frame[0]]:
684 last0[frame[0]] = time
685 store0[frame[0],:] =
True
686 already0[frame[0]] =
True
688 if not alreadyN[frame[-1]]:
689 lastN[frame[-1]] = time
690 storeN[frame[-1],:] =
True
691 alreadyN[frame[-1]] =
True
693 already0[frame[1]] =
False
694 alreadyN[frame[-2]] =
False
696 for state,rep
in enumerate(frame):
697 if store0[rep,state]:
699 store0[rep,state] =
False
701 times0[state].append(time - last0[rep])
702 if storeN[rep,state]:
704 storeN[rep,state] =
False
706 timesN[state].append(time - lastN[rep])
708 times = [[]
for i
in xrange(N)]
709 chose_N = [len(timesN[state]) > len(times0[state])
for state
in \
711 for state
in xrange(N):
712 tauN[state] = sum(timesN[state])/float(len(timesN[state]))
713 tau0[state] = sum(times0[state])/float(len(times0[state]))
716 return tau0, tauN, chose_N, times0, timesN
718 def compute_effective_fraction(tau0, tauN, chose_N):
719 """input: tau0, tauN, chose_N
720 output: effective fraction f(T) (P_up(n)) as introduced in
721 Trebst S, Troyer M, Hansmann UHE, J Chem Phys *124* 174903 (2006).
723 Nadler W, Hansmann UHE, Phys Rev E *75* 026109 (2007)
724 and whose calculation is enhanced in
725 Nadler W, Meinke J, Hansmann UHE, Phys Rev E *78* 061905 (2008)
726 the ideal value of f(n) should be 1 - n/N
734 nstar = N - sum([int(a)
for a
in chose_N]) - 1
736 prdb(
"n* = %d" % nstar)
740 for state
in xrange(2,nstar+1):
741 h0[state] = h0[state-1] + (tau0[state]-tau0[state-1])/float(state)
745 for state
in reversed(xrange(nstar,N-1)):
746 hN[state] = hN[state+1] + \
747 (tauN[state]-tauN[state+1])/float(N-state)
752 for n
in xrange(1,nstar+1):
753 pup[n] = 1 - h0[n]/(h0[nstar] + hN[nstar])
754 for n
in xrange(nstar+1, N):
755 pup[n] = hN[n]/(h0[nstar]+hN[nstar])
759 def spline_diffusivity(pup,params):
760 """spline interpolation of diffusivity: D = 1/(df/dT * heta)
762 from numpy
import linspace
763 robjects.globalenv[
"hetay"] = \
764 robjects.FloatVector(linspace(0,1,num=len(params)).tolist())
765 robjects.globalenv[
"hetax"] = robjects.FloatVector(params)
766 robjects.globalenv[
"pupx"] = robjects.FloatVector(params)
767 robjects.globalenv[
"pupy"] = robjects.FloatVector(pup)
768 heta=r(
'heta <- splinefun(hetax,hetay,method="monoH.FC")')
769 eff=r(
'eff <- splinefun(pupx,pupy,method="monoH.FC")')
770 diff = r(
'diff <- function(x) {-1/(heta(x,deriv=1)*eff(x,deriv=1))}')
771 return lambda x: diff(x)[0]
775 def compute_indicators(replicanums, subs = 1, start=0):
776 """input: replicanums : a list of N lists of size M, where N is the number of
777 states and M is the length of the simulation. Each element is an integer,
778 and corresponds to the label of a replica.
779 output: an indicator function of exchanges (size (N-1)x(M-1)), 1 if exchange and
783 if replicanums[n][m] == replicanums[n+1][m+1] \
784 and replicanums[n][m+1] == replicanums[n+1][m]:
790 for n
in xrange(len(replicanums)-1):
791 indicators.append([exchange(n, m) \
792 for m
in xrange(len(replicanums[n])-1)][start::subs])
797 def update_params_nonergodic(pup, params, write_g=False, num=False):
799 from numpy
import linspace
801 g = linear_interpolation(zip(pup,params),0)
803 d = spline_diffusivity(pup,params)
805 fl.write(
"".join([
"%f %f\n" % (x,g(x))
for x
in linspace(0,1,num=100)]))
807 fl=open(
'diffusivity',
'w')
808 fl.write(
''.join([
"%f %f\n" % (x,d(x))
for x
in
809 linspace(params[0],params[-1],num=100)]))
812 fl.write(
"".join([
"%f %f\n" % (i,j)
for (i,j)
in zip(params,pup)]))
817 newparams = [g(i)
for i
in reversed(linspace(0,1,num=len(params)))]
819 newparams = [g(i)
for i
in reversed(linspace(0,1,num=num))]
821 newparams[0] = params[0]
822 newparams[-1] = params[-1]
826 def update_params(indicators, params, isGood, targetAR = 0.4, immobilePoint = 1,
827 Cv =
None, badMethod =
"dumb", goodMethod =
"dumb", dumb_scale = 0.1):
828 """update the parameters according to the isGood tuple and using the
831 newparams = list(params)
833 if immobilePoint != 1:
834 raise NotImplementedError
836 if Cv
is None and (badMethod !=
"dumb" or goodMethod !=
"dumb"):
837 raise RuntimeError,
"""Cv needs to be estimated if using other methods
838 than 'dumb' for updating!"""
840 if goodMethod ==
"dumb":
841 update_good = update_good_dumb
842 elif goodMethod ==
"step":
843 update_good = update_any_cv_step
844 elif goodMethod ==
"sc":
845 update_good = update_any_cv_sc
846 elif goodMethod ==
"scfull":
847 update_good = update_any_cv_scfull
848 elif goodMethod ==
"nr":
849 update_good = update_any_cv_nr
851 raise NotImplementedError, goodMethod
852 if badMethod ==
"dumb":
853 update_bad = update_bad_dumb
854 elif badMethod ==
"step":
855 update_bad = update_any_cv_step
856 elif badMethod ==
"sc":
857 update_bad = update_any_cv_sc
858 elif badMethod ==
"scfull":
859 update_bad = update_any_cv_scfull
860 elif badMethod ==
"nr":
861 update_bad = update_any_cv_nr
863 raise NotImplementedError, badMethod
866 for pos
in xrange(len(params)-1):
868 newparams[pos+1] = update_good(newparams[pos:pos+2],params[pos:pos+2],
869 indicators[pos], targetAR = targetAR, Cv = Cv)
871 newparams[pos+1] = update_bad(newparams[pos:pos+2],params[pos:pos+2],
872 indicators[pos], targetAR = targetAR, Cv = Cv, scale=dumb_scale)
874 return tuple(newparams)
876 def tune_params_flux(replicanums, params, subs=1, start=0, alpha = 0.05,
877 testMethod =
'anova', meanMethod =
'binom', use_avgAR=
False,
878 power = 0.8, num=
False):
883 if use_avgAR
is not False:
884 raise NotImplementedError
886 prdb(
"computing mean first passage times")
887 tau0, tauN, chose_N, times0, timesN = mean_first_passage_times(replicanums,
888 subs=subs, start=start, use_avgAR=use_avgAR)
890 prdb(
"average round trip time: %.2f (%d+%d events)" % \
891 (tau0[-1]+tauN[0],len(times0[-1]),len(timesN[0])))
892 prdb(
"checking if the parameterset needs to be improved")
897 nstar = N - sum([int(a)
for a
in chose_N]) - 1
901 for n
in xrange(1,N-1):
903 reduced.append([i*2.0/((N-n)*(N-n+1))
for i
in timesN[n]])
905 reduced.append([i*2.0/(n*(n+1))
for i
in times0[n]])
907 anova_result = are_equal(reduced, alpha = alpha, method = testMethod,
909 if (anova_result[
'result']):
910 prdb(
"flux is constant, nothing to do!")
911 min_n = minimum_n(anova_result, alpha)
912 prdb(
'Try to rerun this test with at least %d more samples.' % \
914 return (
False, min_n)
918 prdb(
"parameterset not optimal, computing effective fraction")
919 pup = compute_effective_fraction(tau0, tauN, chose_N)
922 prdb(
"returning new parameterset")
923 params = update_params_nonergodic(pup,params,num=num)
925 return (
True, params)
927 def tune_params_ar(indicators, params, targetAR = 0.4, alpha = 0.05, immobilePoint
928 = 1, CvMethod =
"skip", badMethod =
"dumb", goodMethod =
"dumb",
929 varMethod =
"skip", testMethod =
"anova", meanMethod =
"binom",
930 energies =
None, temps =
None, power=0.8, dumb_scale=0.1):
931 """Tune the replica-exchange parameters and return a new set.
934 indicators -- an (N-1)x(M-1) table, where entry at position (j,i)
935 is True if replica j and j+1 performed an exchange between
937 params -- the current set of N parameters used in the simulation.
940 targetAR -- the target AR which is wanted for the simulation
942 alpha -- the type 1 error on the one-way ANOVA and subsequent
943 t-tests (default: 5%)
944 immobilePoint -- which replica should keep it's parameter fixed
945 (default: 1st replica, e.g. 1)
946 CvMethod -- the heat capacity estimation method (default:
947 "skip", other options: "mbar", "spline", "constant")
948 badMethod -- how to correct the (j+1)th parameter if the
949 acceptance ratio between replicas j and j+1 is off the
950 target value (default: "dumb", options: "step", "sc", "scfull", "nr")
951 goodMethod -- how to update the value of the (j+1)th parameter
952 in the case of a correctly exchanging couple, but if the jth
953 parameter has been modified (default: "dumb",options: "step",
954 "sc" self-consistent, "scfull" self-consistent using the exact equation,
955 "nr" newton-raphson solver for the exact equation)
956 dumb_scale -- (0.0-1.0) in the "dumb" method, scale wrong temperature
957 intervals by this amount. (default: 0.1)
958 testMethod -- how to test for the difference of the means,
959 either "anova" for a one-way anova, or "kruskal" for a
960 Kruskal-Wallis one-way non-parametric anova.
961 meanMethod -- "ttest" for a two-sided one-sample t-test,
962 "binom" for an exact binomial test of the probability.
963 varMethod -- how to test for the equality of variances.
964 "fligner" for the Fligner-Killeen non-parametric test,
965 "bartlett" for Bartlett's test, "skip" to pass.
966 energies -- if CvMethod is set to "mbar", the energies of each
967 state as a function of time are used to estimate the heat capacity.
968 temps -- the temperatures of the simulations, if estimating with "mbar".
971 returns a tuple: (bool, params). bool is True if params have
972 changed, and params is the new set.
977 prdb(
"performing ANOVA")
978 anova_result = are_equal(indicators, targetAR, alpha, method = testMethod,
979 varMethod = varMethod, power=power)
980 if (anova_result[
'result']
and
981 are_equal_to_targetAR(indicators,targetAR,alpha, method = meanMethod)):
982 prdb(
"all means are equal to target AR, nothing to do!")
983 min_n = minimum_n(anova_result, alpha)
984 prdb(
'Try to rerun this test with at least %d more samples.' % ceil(min_n))
985 return (
False, min_n)
986 prdb(
"some means are different, performing t-tests")
989 isGood = find_good_ARs(indicators, targetAR, alpha, method = meanMethod)
990 if not (
False in isGood):
991 prdb(
"""Bad luck: ANOVA says means are not identical, but t-tests
992 can't find the bad rates...""")
993 return (
False, params)
997 prdb(
"performing stationarity test")
998 if not are_stationnary(indicators, alpha):
999 prdb(
"Warning: Some simulations are not stationary!")
1003 prdb(
"launching Cv estimation or skipping it")
1004 if CvMethod ==
"skip":
1006 elif CvMethod ==
"interpolate" or CvMethod ==
"constant":
1007 Cv =
CvEstimator(params, indicators=indicators, method=CvMethod)
1008 elif CvMethod ==
"mbar":
1009 Cv =
CvEstimator(params, energies=energies, temps=temps, method = CvMethod)
1011 raise NotImplementedError, CvMethod
1014 prdb(
'updating params')
1016 params = update_params(indicators, params, isGood, targetAR = targetAR,
1017 immobilePoint = immobilePoint, Cv = Cv,
1018 badMethod = badMethod, goodMethod = goodMethod, dumb_scale=dumb_scale)
1021 return (
True, params)
1023 if __name__ ==
'__main__':
1026 for i
in xrange(1,8):
1027 replicanums.append(tuple(fromfile(
'data/replica-indices/%d.rep' % i,
1028 dtype=int, sep=
'\n')))
1030 prdb(
"replicanums: %dx%d" % (len(replicanums), len(replicanums[0])))
1031 params = tuple(fromfile(
'data/temperatures', sep=
' '))
1034 indicators = compute_indicators(replicanums, subs=1,start=0)
1035 prdb(
"indicators: %dx%d" % (len(indicators), len(indicators[0])))
1036 prdb(
"Exchange rate:")
1037 prdb(array([sum(ind)/float(len(ind))
for ind
in indicators]))
1038 array([sum(ind)/float(len(ind))
for ind
in
1039 indicators]).tofile(
'xchgs',sep=
'\n')
1040 changed, newparams = tune_params(indicators, params, targetAR = 0.25,
1041 badMethod=
"dumb",goodMethod=
"dumb",CvMethod=
"skip",testMethod=
"anova",alpha=0.05)
1043 print "Parameter set seems optimal."
1045 if not True in [abs(newparams[i+1]-newparams[i]) < 1e-3
for i
in xrange(len(newparams)-1)]:
1046 array(newparams).tofile(
'data/temperatures', sep=
' ')
1048 print "PROBLEM IN NEW PARAMETERSET -> not saved"
1049 print "params :",params
1050 print "new params:",newparams