## Splus functions useful in numerical optimization. ## Daniel Heitjan, 13 September 1990 ## Revised, 10 March 1992 ## Style changes, 94.02.26 ## Some minor changes in printing outputs, 02.06.10 ## fun.amoeba<-function(p,y,ftol,itmax,funk,data,vrbs=F){ ## Multidimensional minimization of the function funk(x,data) where x ## is an ndim-dimensional vector and data is a fixed set of data, by ## the downhill simplex method of Nelder and Mead. The structure data ## is arbitrary and is evaluated only in funk. Input is a matrix p ## whose ndim+1 rows are ndim-dimensional vectors which are the ## vertices of the starting simplex, and a data vector in a suitable ## format. Also input is the vector y of length ndim+1, whose ## components must be pre-initialized to the values of funk evaluated ## at the ndim+1 vertices (rows) of p; and ftol the fractional ## convergence tolerance to be achieved in the function value (n.b.!). ## The output list will contain components p, ndim+1 new points all ## within ftol of a minimum function value, y, the function value, ## iter, the number of iterations taken, and converge, a convergence ## indicator. ## Translated from *Numerical Recipes*, 13 March 1989 ## Revised for model fitting, 16 March 1989 ## Revised to handle univariate parameters, 29 July 1989 ## Comments and printing revised, 28 April 1990 ## Stopping criterion revised, 3 May 1990 ## Error in contraction routine fixed, 4 May 1990 ## Replaced 'order' with 'sort.list', 4 May 1990 ## Modified digits to print, 20 June 1990 ## Changed print to cat, 13 September 1990 ## Modify printing, 8 February 1991 ## Added verbose mode, 94.12.25 ## Daniel F. Heitjan ## if (vrbs) cat('entering fun.amoeba\n') ## Three parameters which define the expansions and contractions. alpha<-1.0 beta<-0.5 gamma<-2.0 ## A parameter that governs stopping if the function is converging to ## zero. eps<-1.e-10 ## mpts<-nrow(p) iter<-0 contin<-T converge<-F while (contin) { ## First we must determine which point is the highest (worst), ## next highest, and lowest (best). ord<-sort.list(y) ilo<-ord[1] ihi<-ord[mpts] inhi<-ord[mpts-1] ## Compute the fractional range from highest to lowest and return if ## satisfactory. rtol<-2.*abs(y[ihi]-y[ilo])/(abs(y[ihi])+abs(y[ilo])+eps) if ((rtol=ftol) cat('Amoeba exceeding maximum iterations.\n') } else { iter<-iter+1 if (vrbs) cat('\n iteration',iter,'\n') ## ## Begin a new iteration. Compute the vector average of all points ## except the highest, i.e. the center of the face of the simplex across ## from the high point. We will subsequently explore along the ray from ## the high point through that center. pbar<-matrix(p[-ihi,],(mpts-1),(mpts-1)) pbar<-apply(pbar,2,mean) ## ## Extrapolate by a factor alpha through the face, i.e. reflect the ## simplex from the high point. Evaluate the function at the reflected ## point. pr<-(1+alpha)*pbar-alpha*p[ihi,] ypr<-funk(pr,data) if (ypr<=y[ilo]) { ## Gives a result better than the best point, so try an additional ## extrapolation by a factor gamma, and check out the function there. prr<-gamma*pr+(1-gamma)*pbar yprr<-funk(prr,data) if (yprr=y[inhi]) { ## The reflected point is worse than the second-highest. if (ypr1) { for (j in 1:(i-1)) { pij<-0.5*(p[i,]+p[j,]) yij<-0.5*funk(pij,datalist) b[i,j]<-2*(yij+y0-y00[i]-y00[j]) } } } if (n==1) { b<-b/2 } else { b<-b-0.5*diag(diag(b)) } b<-b+t(b) ## now compute qmat. qmat<-t(p)-p0 sqmat<-solve(qmat) varmat<-solve(t(sqmat)%*%b%*%sqmat) # cat(' leaving fun.nmcovar\n') varmat } ## fun.infocomp<-function(fun.fcn,dl,theta,nlts,thtvr,dlt0=1) { ## Estimate the information matrix of a set of parameters that ## minimize the criterion function fun.fcn, using straight finite ## differences. This function produces a list of estimated ## information matrices, which should then be inspected and--if stable ## and nonsingular--inverted to estimate the covariance. A second ## list contains their eigenvalues. ## Daniel F. Heitjan, 7 April 1990 ## Revised 15 April 1991 # cat('entering fun.infocomp\n') covlist<-vector('list',nlts) eigenlist<-covlist for (i in 1:nlts) { cat('\n i',i,'\n') covlist[[i]]<-fun.fdinfo(theta,fun.fcn,dl,dlt0*(0.2)**(i+2),thtvr) eigenlist[[i]]<-eigen(covlist[[i]])$values cat('eigenvalues of the information\n') print(eigenlist[[i]]) } # cat(' leaving fun.infocomp\n') covlist } ## ## fun.dsinfocomp<-function(fun.fcn,dl,theta,ndigs) { ## Estimate the information matrix of a set of parameters that ## minimize the criterion function fun.fcn, using straight finite ## differences. This function produces a list of estimated ## information matrices, which should then be inspected and--if stable ## and nonsingular--inverted to estimate the covariance. A second ## list contains their eigenvalues. ## Daniel F. Heitjan, 7 April 1990 ## Revised 15 April 1991 # cat('entering fun.dsinfocomp\n') covlist<-vector('list',length(ndigs)) eigenlist<-covlist for (i in 1:length(ndigs)) { cat('\n digits assumed precision',ndigs[i],'\n') covlist[[i]]<-(-1)* fun.dsinfo(theta,fun.fcn,dl,digits=ndigs[i])$d2 eigenlist[[i]]<-eigen(covlist[[i]])$values cat('eigenvalues of the information\n') print(eigenlist[[i]]) } # cat(' leaving fun.dsinfocomp\n') covlist } ## ## fun.findmin<-function(optstruc) { ## Find the value of theta having the minimum value of the criterion ## function in the supplied optstruc list, to be used in a subsequent ## run of fun.runamoeba. ## Daniel F. Heitjan, 7 April 1990 ## Revised, 21 February 1991 # cat('entering fun.findmin\n') imin<-sort.list(optstruc$y)[1] theta<-optstruc$p[imin,] minoff<-optstruc$y[imin] list(theta=theta,y=minoff) # cat(' leaving fun.findmin\n') } ## ## fun.rstrt<-function(optstruc,fun.fcn,dl,fact,itmax,crit,vrbs=F) { ## Restart fun.amoeba, using as parameter array a set of points ## including the parameter giving the minimum so far. ## Daniel F. Heitjan, 23 February 1991 ## Revised, 27 November 1991 theta<-fun.findmin(optstruc)$theta fun.runamoeba(theta,fun.fcn,dl,fact,itmax,crit,vrbs) } ## ## fun.runamoeba<-function(theta,fun.fcn,dl,fact,itmax,crit,vrbs=F) { ## A function for running the minimization function in fun.amoeba. ## The function to be minimized is in fun.fcn, the argument to ## minimize over has starting values in theta, dl is a list of data to ## be supplied, fact is a factor to use to construct the original ## array of theta points for fun.amoeba, and itmax is the maximum ## number of iterations scheduled. ## Daniel F. Heitjan, 7 April 1990 ## Revised, 7 April 1990 theta[(fact!=0)&(theta==0)]<-0.01 np<-length(theta) p<-matrix(theta,ncol=np,nrow=np+1,byrow=T) ystar<-p[,1] for (i in 1:np) { p[i+1,i]<-p[i+1,i]*(1+fact[i]) } for (i in 1:(np+1)) { ystar[i]<-fun.fcn(p[i,],dl) } if (vrbs) { cat('\n\nobjective function at the starting simplex\n') print(ystar) } fun.amoeba(p,ystar,crit,itmax,fun.fcn,dl,vrbs) } ## ## fun.fdinfo<-function(theta,fun.fcn,dl,pdelta,thtvr) { ## Compute an estimate of the observed information matrix by finite ## differences, using the likelihood function (-2ln form) in fun.fcn, ## data in dl. One varies the parameters indicated by 1s in thtvr and ## holds the others constant at their values specified in theta. ## Daniel Heitjan, 9 April 1990 ## Revised, 14 September 1990 ## Revised, 10 March 1992 ## Revised for compatibility with 3.1.1, 10 October 1992 # cat('entering fun.fdinfo\n') np<-length(thtvr[thtvr==1]) # cat('np:',np,'\n') indx<-row(matrix(thtvr,ncol=1))[thtvr==1] delta<-pdelta*(0.001+abs(theta)) delta<-(theta+delta)-theta delta<-delta[thtvr==1] grad<-0*c(1:np) hess<-0*matrix(0,np,np) ## Gradient estimate and diagonal of the Hessian. for (j in 1:np) { thetap<-theta thetap[indx[j]]<-thetap[indx[j]]+delta[j] fp<-(-0.5)*fun.fcn(thetap,dl) thetap<-theta f0<-(-0.5)*fun.fcn(thetap,dl) thetap<-theta thetap[indx[j]]<-thetap[indx[j]]-delta[j] fm<-(-0.5)*fun.fcn(thetap,dl) grad[j]<-(fp-fm)/(2*delta[j]) hess[j,j]<-(fp-2*f0+fm)/(delta[j])**2 } cat('estimated gradient\n') print(grad) if (np>1) { for (j in 2:np) { for (k in 1:(j-1)) { thetap<-theta thetap[indx[j]]<-thetap[indx[j]]+delta[j] thetap[indx[k]]<-thetap[indx[k]]+delta[k] fpp<-(-0.5)*fun.fcn(thetap,dl) thetap<-theta thetap[indx[j]]<-thetap[indx[j]]+delta[j] thetap[indx[k]]<-thetap[indx[k]]-delta[k] fpm<-(-0.5)*fun.fcn(thetap,dl) thetap<-theta thetap[indx[j]]<-thetap[indx[j]]-delta[j] thetap[indx[k]]<-thetap[indx[k]]+delta[k] fmp<-(-0.5)*fun.fcn(thetap,dl) thetap<-theta thetap[indx[j]]<-thetap[indx[j]]-delta[j] thetap[indx[k]]<-thetap[indx[k]]-delta[k] fmm<-(-0.5)*fun.fcn(thetap,dl) hess[j,k]<-(fpp-fpm-fmp+fmm)/(4*delta[j]*delta[k]) } } } hess<-(t(hess)+hess) for (j in 1:np) { hess[j,j]<-hess[j,j]/2 } # cat(' leaving fun.fdinfo\n') -1*hess } ## ## fun.powell<-function(p,fun.fcn,dl,itmax,ftol) { ## Minimization of a function fun.fcn of n variables. Input consists ## of a starting point p that is a vector of length n; data in dl; ## ftol is the fractional tolerance in the function value such that ## failure to decrease by more than this amount on one iteration ## signals doneness; itmax is the largest number of iterations to be ## allowed; fun.fcn is the function to be minimized. The output list ## consists of components p, the best point found; xi, the current ## direction set, and fret, the returned function value at p. ## Translated from *Numerical Recipes*, May 1 1990 ## Modified to show direction-set changes, May 5 1990 ## Daniel F. Heitjan ## cat('entering fun.powell\n') n<-length(p) ## A parameter that governs stopping if the function is converging ## to zero. eps<-1.e-10 ## The initial set of directions. xi<-1 if (n>1) xi<-diag(n) fret<-fun.fcn(p,dl) ## Save the initial point. pt<-p iter<-0 contin<-T while (contin) { iter<-iter+1 cat('\n iteration',iter,'\n') fp<-fret delvec<-0*p for (i in 1:n) { xit<-xi[,i] fptt<-fret lmstruc<-fun.linmin(p,xit,fun.fcn,dl) p<-lmstruc$p; xit<-lmstruc$xi; fret<-lmstruc$fret delvec[i]<-fptt-fret } ibig<-(sort.list(delvec))[n] del<-delvec[ibig] if (2*abs(fp-fret)<=ftol*(abs(fp)+abs(fret)+eps)) contin<-F if (contin) { if (iter==itmax) { cat('fun.powell exceeding maximum iterations.\n') contin<-F } ## Construct the extrapolated point and the average direction. ptt<-2*p-pt xit<-(p-pt) pt<-p fptt<-fun.fcn(ptt,dl) ## Decide whether to use a new direction. if (fptttol1) { r<-(x-w)*(fx-fv) q<-(x-v)*(fx-fw) p<-(x-v)*q-(x-w)*r q<-2*(q-r) if (q>0) p<-(-1)*p q<-abs(q) etemp<-e e<-d if ((abs(p)>=abs(.5*q*etemp))||(p<=q*(a-x))|| (p>=q*(b-x))) contin1<-F if (contin1) { ## Parabolic fit is OK; use it to compute the trial u. d<-p/q u<-x+d if (((u-a)=xm) { e<-a-x } else { e<-b-x } d<-cgold*e } ## By this point, d has been computed by parabolic or golden ## section. if (abs(d)>=tol1) { u<-x+d } else { u<-x+fun.sign(tol1,d) } ## This is the one function evaluation per iteration. fu<-fun.fcn(orgn+u*drct,dl) ## Now lots of housekeeping. if (fu<=fx) { if (u>=x) { a<-x } else { b<-x } v<-w fv<-fw w<-x fw<-fx x<-u fx<-fu } else { if (uitmax) { cat('fun.brent exceed maximum iterations.\n') contin3<-F } } # cat(' leaving fun.brent\n') list(xmin=x,brent=fx) } ## ## fun.mnbrak<-function(ax,bx,fun.fcn,dl,orgn,drct) { ## Given a function fun.fcn with data in dl and origin in orgn and ## direction in drct, and given distinct initial points ax and bx, ## this routine searches in the downhill direction (defined by the ## function as evaluated at the initial points). The output list ## includes new scalars ax, bx and cx which bracket a minimum of the ## function. Also returned are the function values at the three ## points: fa, fb and fc. ## Translated from *Numerical Recipes*, May 1 1990 ## Daniel F. Heitjan # cat('entering fun.mnbrak\n') gold<-1.618034; glimit<-100; tiny<-1.e-20 fa<-fun.fcn(orgn+ax*drct,dl) fb<-fun.fcn(orgn+bx*drct,dl) if (fb>fa) { ## ... switch roles, so a is the point with larger f value. dum<-ax; ax<-bx; bx<-dum dum<-fb; fb<-fa; fa<-dum } cx<-bx+gold*(bx-ax) fc<-fun.fcn(orgn+cx*drct,dl) contin<-T while(contin) { if (fb>=fc) { ## Compute quadratic approximation to the minimum. r<-(bx-ax)*(fb-fc) q<-(bx-cx)*(fb-fa) u<-bx-((bx-cx)*q-(bx-ax)*r)/(2*fun.sign(max(abs(q-r),tiny),q-r)) ## Compute largest u to try. ulim<-bx+glimit*(cx-bx) if ((bx-u)*(u-cx)>0) { ## Parabolic u is between b and c. Try it. fu<-fun.fcn(orgn+u*drct,dl) if (fufb) { ## A minimum is between a and u. cx<-u fc<-fu contin<-F } if (contin) { ## Parabolic fit did not fix a minimum; set u to its default. u<-cx+gold*(cx-bx) fu<-fun.fcn(orgn+u*drct,dl) } } else if ((cx-u)*(u-ulim)>0) { ## Parabolic u is between c and its allowed limit. fu<-fun.fcn(orgn+u*drct,dl) if (fu=0) { ## Parabolic u exceeds limit value; reduce it. u<-ulim fu<-fun.fcn(orgn+u*drct,dl) } else { ## Simply reject parabolic u and replace it with default u. u<-cx+gold*(cx-bx) fu<-fun.fcn(orgn+u*drct,dl) } if (contin) { ## If a minimum was not located, eliminate oldest point and ## repeat. ax<-bx bx<-cx cx<-u fa<-fb fb<-fc fc<-fu } } else { ## Original a, b and c do the trick. contin<-F } } # cat(' leaving fun.mnbrak\n') list(ax=ax,bx=bx,cx=cx,fa=fa,fb=fb,fc=fc) } ## ## fun.dssign<-function(a) { ## Dennis-Schnabel vectorized sign function. x<-0*a x[a>0]<-1 x[a<0]<-(-1) x } ## ## fun.sign<-function(a1,a2){ ## Fortran sign function. x<-0 if (a2>=0) x<-abs(a1) else x<-(-1)*abs(a1) x } ## ## fun.nrstep<-function(fun.fcn,dl,theta,pdelta) { ## Do a Newton-Raphson step with numerically estimated derivatives. ## Daniel F. Heitjan, 23 February 1991 # cat('entering fun.nrstep\n') optlst<-fun.difcomp(theta,fun.fcn,dl,pdelta) theta<-theta-0.5*solve(optlst$d2,optlst$d1) alglk<-fun.fcn(theta,dl) cat('-2lnL:',alglk,'\n') # cat(' leaving fun.nrstep\n') list(theta=theta,alglk=alglk) } ## ## fun.difcomp<-function(theta,fun.fcn,dl,pdelta) { ## Compute an estimate of the observed information matrix by ## finite differences, using the likelihood function (-2ln form) ## in fun.fcn, data in dl. ## Daniel Heitjan, 15 February 1991 ## Revised 10 March 1992 # cat('entering fun.difcomp\n') np<-length(theta) delta<-pdelta*(0.001+abs(theta)) delta<-(theta+delta)-theta grad<-0*c(1:np) hess<-0*matrix(0,np,np) ## Gradient estimate and diagonal of the Hessian. for (j in 1:np) { thetap<-theta thetap[j]<-thetap[j]+delta[j] fp<-(-0.5)*fun.fcn(thetap,dl) thetap<-theta f0<-(-0.5)*fun.fcn(thetap,dl) thetap<-theta thetap[j]<-thetap[j]-delta[j] fm<-(-0.5)*fun.fcn(thetap,dl) grad[j]<-(fp-fm)/(2*delta[j]) hess[j,j]<-(fp-2*f0+fm)/(delta[j])**2 } cat('estimated gradient\n') print(grad) if (np>1) { for (j in 2:np) { for (k in 1:(j-1)) { thetap<-theta thetap[j]<-thetap[j]+delta[j] thetap[k]<-thetap[k]+delta[k] fpp<-(-0.5)*fun.fcn(thetap,dl) thetap<-theta thetap[j]<-thetap[j]+delta[j] thetap[k]<-thetap[k]-delta[k] fpm<-(-0.5)*fun.fcn(thetap,dl) thetap<-theta thetap[j]<-thetap[j]-delta[j] thetap[k]<-thetap[k]+delta[k] fmp<-(-0.5)*fun.fcn(thetap,dl) thetap<-theta thetap[j]<-thetap[j]-delta[j] thetap[k]<-thetap[k]-delta[k] fmm<-(-0.5)*fun.fcn(thetap,dl) hess[j,k]<-(fpp-fpm-fmp+fmm)/(4*delta[j]*delta[k]) } } } hess<-t(hess)+hess for (j in 1:np) { hess[j,j]<-hess[j,j]/2 } cat('diag(hess)\n') print(diag(hess)) cat(' leaving fun.difcomp\n') list(d1=grad,d2=hess) } ## ## fun.macheps<-function() { ## A function for computing macheps, the machine precision, the ## smallest number tau such that 1+tau>1. See p. 13 of Dennis and ## Schnabel (1983) *Numerical Methods for Unconstrained Optimization ## and Nonlinear Equations* Englewood Cliffs, NJ, Prentice-Hall. ## Daniel F. Heitjan, 9 March 1992 eps<-1 while ((1+eps)>1) { eps<-eps/2 cat('\n current number is',eps) } 2*eps } ## ## fun.dsinfo<-function(theta,fun.fcn,dl,scale=1+0*theta,digits=12) { ## Calculate a central difference gradient and hessian approximation ## to -0.5*fun.fcn. This algorithm assumes that the supplied fun.fcn ## is -2lnL. It computes step size as suggested in algorithm A5.6.4 ## on p. 323 of Dennis and Schnabel (1983) *Numerical Methods for ## Unconstrained Optimization and Nonlinear Equations* Englewood ## Cliffs, NJ, Prentice-Hall. The step size is ## eta^(1/3)*pmax(theta,scale)*fun.dssign(theta). Here scale is the ## typical size of theta input by the user, and eta=10^(-digits), ## where digits is the number of reliable base 10 digits in fun.fcn. ## The corresponding elements of the computed and true grad typically ## will agree in about their first (2*digits/3) base 10 digits, ## according to D&S. ## Daniel F. Heitjan, 10 March 1992 ## Revised, 02.06.10 # cat('entering fun.dsinfo\n') np<-length(theta) eta<-10^(-digits) delta<-eta^(1/3)*pmax(abs(theta),abs(scale))*fun.dssign(theta) delta<-(theta+delta)-theta grad<-0*c(1:np) hess<-0*matrix(0,np,np) ## Gradient estimate and diagonal of the Hessian. for (j in 1:np) { thetap<-theta thetap[j]<-thetap[j]+delta[j] fp<-(-0.5)*fun.fcn(thetap,dl) thetap<-theta f0<-(-0.5)*fun.fcn(thetap,dl) thetap<-theta thetap[j]<-thetap[j]-delta[j] fm<-(-0.5)*fun.fcn(thetap,dl) grad[j]<-(fp-fm)/(2*delta[j]) hess[j,j]<-(fp-2*f0+fm)/(delta[j])**2 } # cat('estimated gradient\n') # print(grad) if (np>1) { for (j in 2:np) { for (k in 1:(j-1)) { thetap<-theta thetap[j]<-thetap[j]+delta[j] thetap[k]<-thetap[k]+delta[k] fpp<-(-0.5)*fun.fcn(thetap,dl) thetap<-theta thetap[j]<-thetap[j]+delta[j] thetap[k]<-thetap[k]-delta[k] fpm<-(-0.5)*fun.fcn(thetap,dl) thetap<-theta thetap[j]<-thetap[j]-delta[j] thetap[k]<-thetap[k]+delta[k] fmp<-(-0.5)*fun.fcn(thetap,dl) thetap<-theta thetap[j]<-thetap[j]-delta[j] thetap[k]<-thetap[k]-delta[k] fmm<-(-0.5)*fun.fcn(thetap,dl) hess[j,k]<-(fpp-fpm-fmp+fmm)/(4*delta[j]*delta[k]) } } } hess<-(t(hess)+hess) for (j in 1:np) { hess[j,j]<-hess[j,j]/2 } # cat('diag(hess)\n') # print(diag(hess)) # cat(' leaving fun.dsinfo\n') list(d1=grad,d2=hess) } ##