# code for athletic records analysis - February 2015 # read data yy=read.table('w3000.txt',header=F) r=5 # liklihood function (compute NLLH - defaults to 10^10 if parameter values infeasible) # par vector is (mu, log psi, xi) lh=function(par){ if(abs(par[2])>20){return(10^10)} #if(abs(par[3])>1){return(10^10)} if(par[3]>=0){return(10^10)} mu=par[1] psi=exp(par[2]) xi=par[3] f=0 for(i in 9:21){ f=f+r*par[2] s1=1+xi*(mu-yy[i,6])/psi if(s1<=0){return(10^10)} s1=-log(s1)/xi if(abs(s1)>20){return(10^10)} f=f+exp(s1) for(j in 2:6){ s1=1+xi*(mu-yy[i,j])/psi if(s1<=0){return(10^10)} f=f+(1+1/xi)*log(s1) }} return(f) } # trial optimization of likelihood function par=c(520,0,-0.01) lh(par) par=c(510,1,-0.1) lh(par) opt1=optim(par,lh,method="Nelder-Mead") opt2=optim(par,lh,method="BFGS") opt3=optim(par,lh,method="CG") opt1$par opt2$par opt3$par opt1$value opt2$value opt3$value # MMLE of endpoint (intepretetd as smallest possible running time) opt1$par[1]+exp(opt1$par[2])/opt1$par[3] opt2$par[1]+exp(opt2$par[2])/opt2$par[3] # now do more through optimization and prepare for MCMC par=c(520,0,-0.01) opt2=optim(par,lh,method="BFGS",hessian=T) library(MASS) A=ginv(opt2$hessian) sqrt(diag(A)) eiv=eigen(A) V=eiv$vectors V=V %*% diag(sqrt(eiv$values)) %*% t(V) # MCMC - adjust nsim=total number of simulations, # nsave=number of iterations between each time data are saved, # nwrite=number of iterations between each time data are written to disk par=opt2$par nsim=5000 nsave=1 nwrite=100 del=1 lh1=lh(par) parsim=matrix(nrow=nsim/nsave,ncol=3) accp=rep(0,nsim) for(isim in 1:nsim){ # Metropolis update step parnew=par+del*V %*% rnorm(3) lh2=lh(parnew) if(runif(1)0]=1-exp(-s1[s1>0]^(-1/parsim[s1>0,3])) s2=1+parsim[,3]*(parsim[,1]-486.11)/exp(parsim[,2]) s2[s2<0]=0 s2[s2>0]=1-exp(-s2[s2>0]^(-1/parsim[s2>0,3])) mean(s2/s1) mean(s2==0) quantile(s2/s1,c(0.5,0.9,0.95,0.975,0.995)) endp=parsim[,1]+exp(parsim[,2])/parsim[,3] sum(endp<486.11)/length(endp) mean(accp) plot(density(endp),xlim=c(460,510)) s3=log10(s2[s2>0]/s1[s2>0]) # results from presaved MCMC output parsim1=matrix(scan('parsim1.txt'),nrow=10000,ncol=3) parsim=parsim1[(length(parsim1[,1])/2+1):length(parsim1[,1]),] s1=1+parsim[,3]*(parsim[,1]-502.62)/exp(parsim[,2]) s1[s1<0]=s1 s1[s1>0]=1-exp(-s1[s1>0]^(-1/parsim[s1>0,3])) s2=1+parsim[,3]*(parsim[,1]-486.11)/exp(parsim[,2]) s2[s2<0]=0 s2[s2>0]=1-exp(-s2[s2>0]^(-1/parsim[s2>0,3])) mean(s2/s1) mean(s2==0) quantile(s2/s1,c(0.5,0.9,0.95,0.975,0.995)) endp=parsim[,1]+exp(parsim[,2])/parsim[,3] sum(endp<486.11)/length(endp) plot(density(endp[endp>460])) s3=log10(s2[s2>0]/s1[s2>0]) plot(density(s3)) # bootstrap sampling for posterior mean probability n1=length(parsim[,1]) nboot=5000 pp=rep(NA,nboot) for(i in 1:nboot){ boot=floor(1+runif(n1)*n1) pp[i]=mean(s2[boot]/s1[boot]) } quantile(pp,c(0.025,0.5,0.975)) # output of bootstrap calculation # 3 times based on nboot=5000: # > quantile(pp,c(0.025,0.5,0.975)) # 2.5% 50% 97.5% # 0.0003271896 0.0004200988 0.0005221467 # 2.5% 50% 97.5% # 0.0003248139 0.0004202982 0.0005221634 # 2.5% 50% 97.5% # 0.0003310929 0.0004222637 0.0005235336