# updated 3/20/2025: R code used for handbook chapter

# \title{Detection and Attribution of Extreme Weather Events: A Statistical Review} 
# \author{Richard L. Smith, Department of Statistics and Operations Research, University of North Carolina at Chapel Hill, Chapel Hill, N.C., U.S.A.}

# To appear in:\\
# \textit{Handbook on Statistics of Extremes} \\
# Chapman \& Hall / CRC series on Handbook of Modern Statistical Methods\\
# Miguel de Carvalho,  Raphael G. Huser, Philippe Naveau and Brian J Reich \\(editors)

# preliminaries: load gev.test and AdaptMH routines

gev.test=function(xdat, ydat = NULL, mul = NULL, sigl = NULL, shl = NULL, 
    mulink = identity, siglink = identity, shlink = identity, 
    muinit = NULL, siginit = NULL, shinit = NULL,
    show = FALSE, method = "Nelder-Mead", maxit = 10000,nsim){
fit1=gev.fit(xdat, ydat = ydat, mul = mul, sigl = sigl, shl = shl, 
    mulink = mulink, siglink = siglink, shlink = shlink, 
    muinit = muinit, siginit = siginit, shinit = shinit,
    show = show, method = method, maxit = maxit)
u1=sort(exp(-(1+fit1$vals[,3]*(xdat-fit1$vals[,1])/fit1$vals[,2])^(-1/fit1$vals[,3])))
n=length(u1)
plotpos=qnorm((1:n-0.5)/n)
lg=cor(qnorm(u1),plotpos)
d1<-max(c((1:n)/n-u1,u1-(0:(n-1))/n))
w1<-sum((u1-((1:n)-0.5)/n)^2)+1/(12*n)
a1<--sum((2*(1:n)-1)*log(u1)+(2*n+1-2*(1:n))*log(1-u1))/n-n
count=rep(0,4)
for(isim in 1:nsim){
ysim=fit1$vals[,1]+fit1$vals[,2]*((-log(runif(n)))^(-fit1$vals[,3])-1)/fit1$vals[,3]
fit2=gev.fit(ysim, ydat = ydat, mul = mul, sigl = sigl, shl = shl, 
    mulink = mulink, siglink = siglink, shlink = shlink, 
    muinit = muinit, siginit = siginit, shinit = shinit,
    show = show, method = method, maxit = maxit)
u2=sort(exp(-(1+fit2$vals[,3]*(ysim-fit2$vals[,1])/fit2$vals[,2])^(-1/fit2$vals[,3])))
lg2=0
if(!is.na(sum(u2)))lg2=cor(qnorm(u2),plotpos)
d2<-max(c((1:n)/n-u2,u2-(0:(n-1))/n))
w2<-sum((u2-((1:n)-0.5)/n)^2)+1/(12*n)
a2<--sum((2*(1:n)-1)*log(u2)+(2*n+1-2*(1:n))*log(1-u2))/n-n
if(lg2<lg)count[1]<-count[1]+1
if(d2>d1)count[2]<-count[2]+1
if(w2>w1)count[3]<-count[3]+1
if(a2>a1)count[4]<-count[4]+1
}
ddf=matrix(c(lg,d1,w1,a1,count/nsim),byrow=T,ncol=4)
ddf=round(ddf,5)
ddf=cbind(c('Test','p-value'),ddf)
ddf=rbind(c('','LG','KS','CVM','AD'),ddf)
return(ddf)
}

# program to implement Adaptive Metropolis algorithm of Haario et al. 
#
# Reference:
# An adaptive Metropolis algorithm, by H. Haario, E. Saksman and J. Tamminen,
# Bernoulli 7(2), 2001, 223-242
#
AdaptMH=function(lh,par,npar,C0,nsim,n1,n2,n3,eps,scal){
#
# lh: neg log likelihood function to be called
# par: initial set of parameter estimates
# npar: dimension of par
# C0: npar x npar matrix, initial guess of covariance matrix
# nsim: total number of simulations
# n1: initial run with given C0
# n2: interval between subsequent updates
# n3: interval between updates of output parameter matrix
# scal: scaling parameter, recommended value is 2.4 but user is allowed to change
#   sd parameter is scal*scal/npar, see Haario 2001, Gelman et al 1996
# parout: output parameter matrix of dimension n4 x npar, where n4=floor(nsim/n3)
#
n4=floor(nsim/n3)
parout=matrix(nrow=n4,ncol=npar)
C=C0
sum1=par
sum2=par %*% t(par)
sd=scal*scal/npar
ID=matrix(0,nrow=npar,ncol=npar)
diag(ID)=rep(1,npar)
parold=par
lhold=lh(par)
acc0=0
for(isim in 1:nsim){
if(isim==n1){
# update C and xbar
xbar=sum1/(n1+1)
C=(sum2-(n1+1)*xbar%*%t(xbar))/n1
sum1=rep(0,npar)
sum2=matrix(0,nrow=npar,ncol=npar)
}
if(isim>n1&(n2*floor((isim-n1)/n2)==(isim-n1))){
# update C and xbar
sum2=sum2+(isim-n2+1)*xbar%*%t(xbar)
xbar=((isim-n2+1)*xbar+sum1)/(isim+1)
C=(isim-n2)*C/isim+(sum2-(isim+1)*xbar%*%t(xbar))/isim
sum1=rep(0,npar)
sum2=matrix(0,nrow=npar,ncol=npar)
}
# generate new trial value
parnew=as.vector(parold+t(chol(sd*C+sd*eps*ID))%*%rnorm(npar))
lu=log(runif(1))
lhnew=lh(parnew)
if(lhnew-lhold+lu<0){
parold=parnew
lhold=lhnew
acc0=acc0+1
}
#print(c('acc0 = ',acc0,' out of ',isim))
iout=floor(isim/n3)
if(n3*iout==isim){
# add current parameter vertor to output matrix
parout[iout,]=parold
}
sum1=sum1+parold
sum2=sum2+parold%*%t(parold)
}
return(value=list(parout=parout,acc0=acc0))
}

# end of installation

# annual maxima analysis

TX=matrix(scan('https://rls.sites.oasis.unc.edu/s834-2023/Data/TX.txt'),byrow=T,ncol=2)
RM=matrix(scan('https://rls.sites.oasis.unc.edu/s834-2023/Data/RM.txt'),byrow=T,ncol=2)
gmst=matrix(scan('https://rls.sites.oasis.unc.edu/s834-2023/Data/gmst.txt'),byrow=T,ncol=2)

library(ismev)

# first analysis: GEV fit with GMST covariate, omit 2021

gg1=gev.fit(TX[1:71,2], ydat = gmst[72:142,], mul = 2, sigl =
NULL, shl = NULL, mulink = identity, siglink = identity, shlink =
identity, muinit = c(mean(TX[1:71,2]),0), siginit = 1.5*sd(TX[1:71,2]), shinit = -0.1, show = TRUE,
method = "Nelder-Mead", maxit = 10000)
#method = "BFGS", maxit = 10000,ndeps=rep(1e-6,4))

# ismev diagnostics
gev.diag(gg1)

# test of fit

gev.test(TX[1:71,2], ydat = gmst[72:142,], mul = 2, sigl =
NULL, shl = NULL, mulink = identity, siglink = identity, shlink =
identity, muinit = c(mean(TX[1:71,2]),0), siginit = 1.5*sd(TX[1:71,2]), shinit = -0.1, show = FALSE,
method = "Nelder-Mead", maxit = 10000, nsim=10000)

# add variable scale parameter (not significant)

gg2=gev.fit(TX[1:71,2], ydat = gmst[72:142,], mul = 2, sigl =
2, shl = NULL, mulink = identity, siglink = identity, shlink =
identity, muinit = c(mean(TX[1:71,2]),0), siginit = c(1.5*sd(TX[1:71,2]),0), shinit = -0.1, show = TRUE,
method = "Nelder-Mead", maxit = 10000)
#method = "BFGS", maxit = 10000,ndeps=rep(1e-6,4))

# anova test 
pchisq(2*(gg1$nllh-gg2$nllh),1,lower.tail=F)

# repeat with 2021 included

gg1a=gev.fit(TX[1:72,2], ydat = gmst[72:143,], mul = 2, sigl =
NULL, shl = NULL, mulink = identity, siglink = identity, shlink =
identity, muinit = c(mean(TX[1:71,2]),0), siginit = 1.5*sd(TX[1:71,2]), shinit = -0.1, show = TRUE,
method = "Nelder-Mead", maxit = 10000)
#method = "BFGS", maxit = 10000,ndeps=rep(1e-6,4))

gev.diag(gg1a)


gev.test(TX[1:72,2], ydat = gmst[72:143,], mul = 2, sigl =
NULL, shl = NULL, mulink = identity, siglink = identity, shlink =
identity, muinit = c(mean(TX[1:71,2]),0), siginit = 1.5*sd(TX[1:71,2]), shinit = -0.1, show = FALSE,
method = "Nelder-Mead", maxit = 10000, nsim=10000)


#endpoint estimation
gg1$mle[1]+gg1$mle[2]*gmst[143,2]-gg1$mle[3]/gg1$mle[4]
gr1=c(1,gmst[143,2],-1/gg1$mle[4],gg1$mle[3]/gg1$mle[4]^2)
sqrt(gr1%*%gg1$cov%*%gr1)

#endpoint estimation under preind
gg1$mle[1]+gg1$mle[2]*(gmst[143,2]-1.2)-gg1$mle[3]/gg1$mle[4]
gr1=c(1,gmst[143,2]-1.2,-1/gg1$mle[4],gg1$mle[3]/gg1$mle[4]^2)
sqrt(gr1%*%gg1$cov%*%gr1)

# repeat with new datapoint included
#endpoint estimation
gg1a$mle[1]+gg1a$mle[2]*gmst[143,2]-gg1a$mle[3]/gg1a$mle[4]
gr1=c(1,gmst[143,2],-1/gg1a$mle[4],gg1a$mle[3]/gg1a$mle[4]^2)
sqrt(gr1%*%gg1a$cov%*%gr1)

#endpoint estimation under preind
gg1a$mle[1]+gg1a$mle[2]*(gmst[143,2]-1.2)-gg1a$mle[3]/gg1a$mle[4]
gr1=c(1,gmst[143,2]-1.2,-1/gg1a$mle[4],gg1a$mle[3]/gg1a$mle[4]^2)
sqrt(gr1%*%gg1a$cov%*%gr1)

# likelihood function (compute NLLH - defaults to 10^10 if parameter values infeasible)
#
# data set in y, single covariate in x
y=TX[1:71,2]
x=gmst[72:142,2]
nn=length(y)
lh1=function(par){
if(abs(par[2])>20){return(10^10)}
if(abs(par[3])>1){return(10^10)}
#if(par[3]>=0){return(10^10)}
mu=par[1]+par[4]*x
sig=rep(exp(par[2]),nn)
xi=rep(par[3],nn)
a1=1+xi*(y-mu)/sig
if(any(a1<=0))return(10^10)
sum(log(sig))+sum(a1^(-1/xi))+sum(log(a1)*(1/xi+1))
}


par0=c(mean(y),sd(y),-0.01,0)
lh1(par0)

opt2=optim(par0,lh1,method="BFGS",control=list(ndeps=rep(1e-6,length(par0)),maxit=10000),hessian=T)

# check results are the same (modulo log transformation of sigma)
opt2$par
sqrt(diag(solve(opt2$hessian)))
gg1$mle
gg1$se




# load AdaptMH routine

# this is just a trial run - show how it works
par=opt2$par
npar=length(par)
C0=solve(opt2$hessian)
nsim=2000000
nn1=nsim/10
nn2=50
nn3=10
eps=0.001
scal=2.4
set.seed(123)
T1=Sys.time()
bay1=AdaptMH(lh1,par,npar,C0,nsim,nn1,nn2,nn3,eps,scal)
T2=Sys.time()
print(T2-T1)
print(bay1$acc0/nsim)
# discard first half of chain as warmup


parout=bay1$parout[(nsim/20+1):(nsim/10),]

# preload stored values

parout=read.csv('https://rls.sites.oasis.unc.edu/s834-2023/Data/parout1.csv',header=F)


par(mfrow=c(2,2))
plot(1:10000,parout[10*(1:10000),1],type='l',xlab='Iteration',ylab='beta0')
plot(1:10000,parout[10*(1:10000),2],type='l',xlab='Iteration',ylab='log sigma')
plot(1:10000,parout[10*(1:10000),3],type='l',xlab='Iteration',ylab='xi')
plot(1:10000,parout[10*(1:10000),4],type='l',xlab='Iteration',ylab='beta1')


ep1=parout[,1]+gmst[143,2]*parout[,4]-exp(parout[,2])/parout[,3]
ep2=parout[,1]+(gmst[143,2]-1.2)*parout[,4]-exp(parout[,2])/parout[,3]
mean(ep1>39.6)
mean(ep2>39.6)
mean(ep1)
mean(ep2)

# restrict to "sensible" values
ep1=ep1[30<ep1&ep1<40]
ep2=ep2[30<ep2&ep2<40]
mean(ep1)
mean(ep2)

par(mfrow=c(1,2),cex=1.3)
plot(density(ep1),main=expression(P[F]),xlab='Maximum Temperature',ylab='Posterior Density')
plot(density(ep2),main=expression(P[C]),xlab='Maximum Temperature',ylab='Posterior Density')


# probability of exceeding 2021 value or previous max (2006)

# mle values

crit=TX[72,2]
crit=TX[57,2]
aa1=1+opt2$par[3]*(crit-opt2$par[1]-gmst[143,2]*opt2$par[4])/exp(opt2$par[2])
aa2=1+opt2$par[3]*(crit-opt2$par[1]-(gmst[143,2]-1.2)*opt2$par[4])/exp(opt2$par[2])
aa1
aa2
1-exp(-aa1^(-1/opt2$par[3]))

# Bayesian

crit=TX[72,2]
crit=TX[57,2]
aa=1+parout[,3]*(crit-parout[,1]-gmst[143,2]*parout[,4])/exp(parout[,2])
aa[aa<0&parout[,3]<0]=0
aa[aa<0&parout[,3]>0]=-1
aa[aa>0]=1-exp(-aa[aa>0]^(-1/parout[aa>0,3]))
aa[aa<(-0.5)]=1
aa1=aa
aa=1+parout[,3]*(crit-parout[,1]-(gmst[143,2]-1.2)*parout[,4])/exp(parout[,2])
aa[aa<0&parout[,3]<0]=0
aa[aa<0&parout[,3]>0]=-1
aa[aa>0]=1-exp(-aa[aa>0]^(-1/parout[aa>0,3]))
aa[aa<(-0.5)]=1
aa2=aa/aa1
mean(aa)
mean(aa1)
aa2[aa1==0]=0
mean(aa2)
mean(aa2[parout[,4]>0])
mean(parout[,4]>0)
mean(aa2<0.01)


# compilation of Bayesian outputs
bays=matrix(ncol=7,nrow=9)
for(j in 1:7){
crit=32+j
bays[1,j]=crit
nbay=nrow(parout)
aa=1+parout[,3]*(crit-parout[,1]-gmst[143,2]*parout[,4])/exp(parout[,2])
aa[aa<0&parout[,3]<0]=0
aa[aa<0&parout[,3]>0]=-1
aa[aa>0]=1-exp(-aa[aa>0]^(-1/parout[aa>0,3]))
aa[aa<(-0.5)]=1
aa1=aa
aa=1+parout[,3]*(crit-parout[,1]-(gmst[143,2]-1.2)*parout[,4])/exp(parout[,2])
aa[aa<0&parout[,3]<0]=0
aa[aa<0&parout[,3]>0]=-1
aa[aa>0]=1-exp(-aa[aa>0]^(-1/parout[aa>0,3]))
aa[aa<(-0.5)]=1
aa2=aa/aa1
bays[2,j]=mean(aa)
bays[3,j]=mean(aa1)
aa2[aa1==0]=0
#sum(aa2)/nbay
mean(aa2)
bays[4,j]=mean(aa2[parout[,4]>0])
bays[5,j]=sum(aa2<0.01)/nbay
bays[6,j]=sum(aa2<0.001)/nbay
bays[7,j]=sum(aa2<0.0001)/nbay
bays[8,j]=sum(aa2<0.00001)/nbay
bays[9,j]=sum(aa2<0.000001)/nbay
}

bays=round(bays,8)
bays=data.frame(t(bays))
names(bays)=c('Temp','E{PC}','E{PF}','E{PC/PF}','P(RR<0.01)','P(RR<0.001)','P(RR<0.0001)','P(RR<0.00001)','P(RR<0.000001)')

bays



###################################################

# Threshold methods



### process daily data

par(mfrow=c(1,1))

D3=read.delim('https://climexp.knmi.nl/data/itmax_era5new_index.dat')

# alternate:
#D3=read.delim('https://rls.sites.oasis.unc.edu/s834-2023/Data/itmax_era5new_index.dat.txt')


D3=D3[23:26198,1]
Year=as.numeric(substr(D3,1,4))
Mon=as.numeric(substr(D3,5,6))
Day=as.numeric(substr(D3,7,8))
Tmax=as.numeric(substr(D3,11,20))
DOY=Day+c(0,31,59,90,120,151,181,212,243,273,304,334)[Mon]
DOY[round(Year/4)==Year/4]=DOY[round(Year/4)==Year/4]+1

# DOY averages
tmn=rep(NA,365)
for(dd in 1:365){tmn[dd]=mean(Tmax[DOY==dd&Year<2021])}
plot(1:365,tmn,type='l')

# study shape of within-year temperatures
x=1:365
reg1=lm(tmn~I(cos(2*pi*x/365))+I(sin(2*pi*x/365)))
reg2=lm(tmn~I(cos(2*pi*x/365))+I(sin(2*pi*x/365))+I(cos(4*pi*x/365))+I(sin(4*pi*x/365)))
reg3=lm(tmn~I(cos(2*pi*x/365))+I(sin(2*pi*x/365))+I(cos(4*pi*x/365))+I(sin(4*pi*x/365))+I(cos(6*pi*x/365))+I(sin(6*pi*x/365)))
anova(reg1,reg2,test='F')
anova(reg2,reg3,test='F')
lines(x,reg2$fitted)
lines(x,reg1$fitted,lty=2)
# conclusion: single sinusoid does not fit data, combination of two sinusoids does fit

# similar study for annual means
tmy=rep(NA,71)
for(iy in 1:71){tmy[iy]=mean(Tmax[Year==1949+iy])}
plot(1:71,tmy,type='l')
library(splines)
sp0=1:71
sp1=ns(1:71,2)
sp2=ns(1:71,3)
sp3=ns(1:71,4)
sp4=ns(1:71,5)
lm0=lm(tmy~sp0)
lm1=lm(tmy~sp1)
lm2=lm(tmy~sp2)
lm3=lm(tmy~sp3)
lm4=lm(tmy~sp4)
anova(lm0,lm1,test='F')
anova(lm0,lm2,test='F')
anova(lm0,lm3,test='F')
anova(lm0,lm4,test='F')
lines(sp0,lm0$fitted)
summary(lm0)
# overall linear trned does fine


# define time-varying threshold
library(quantreg)
library(splines)
yy=1:26176
qq=0.9
uu=rq(Tmax~I(cos(2*pi*DOY/365))+I(sin(2*pi*DOY/365))+I(cos(4*pi*DOY/365))+I(sin(4*pi*DOY/365))+Year,tau=qq)$fitted

for(j in 1:12){print(mean(Tmax[Mon==j]>uu[Mon==j]))}
for(j in 1950:2020){print(mean(Tmax[Year==j]>uu[Year==j]))}

# calculate extremal index and decluster
library(texmex)

ei=extremalIndex(Tmax,threshold=uu)
ei$EIintervals
ydc=declust(ei)

# look at first part of output
ydc$exceedanceTimes[1:100]
ydc$isClusterMax[1:100]
Tmax[ydc$exceedanceTimes][1:100]

y=Tmax
y[ydc$exceedanceTimes[ydc$isClusterMax==F]]=-1000
z=gmst[Year-1882]
z1=RM[Year-1849,2]

# find when the high values occurred
crit=29
mct=rep(0,12)
for(j in 1:12){mct[j]=sum(y>crit&Mon==j)}
mct
sum(y>crit&DOY<135)+sum(y>crit&DOY>258)
# conclusion: all high alues occur between days 135 and 255 of year (about May 15 to Sep 15)

# restrict to DOY between 135 and 255, fit point process model

u1=rep(0,26176)
u1[1:25933]=1
u1[DOY<135]=0
u1[DOY>255]=0
sum(u1)

# first model: no covariates
pp0=pp.fit(y[which(u1==1)], thr=uu[which(u1==1)], npy = 121, ydat =NULL, 
mul = NULL, sigl =
NULL, shl = NULL, mulink = identity, siglink = identity, shlink =
identity, muinit = 30, siginit = 4, shinit = 0.0001, show = TRUE,
method = "Nelder-Mead", maxit = 10000)
#method = "BFGS", maxit = 10000,ndeps=rep(1e-6,4))
# nllh = 384.5786

# second model, covariates in mu, not in sigma or xi
X=cbind(gmst[Year-1878,2],cos(2*pi*DOY/365),sin(2*pi*DOY/365),cos(4*pi*DOY/365),sin(4*pi*DOY/365))

pp1=pp.fit(y[which(u1==1)], thr=uu[which(u1==1)], npy = 121, ydat =X[which(u1==1),], 
mul = 1:5, sigl =
NULL, shl = NULL, mulink = identity, siglink = identity, shlink =
identity, muinit = c(pp0$mle[1],0,0,0,0,0), siginit = pp0$mle[2], shinit = pp0$mle[3], show = TRUE,
method = "Nelder-Mead", maxit = 10000)
#method = "BFGS", maxit = 10000,ndeps=rep(1e-6,8))
# nllh = 332.3046

# third model: add 12-month sinusoid term in sigma

pp2=pp.fit(y[which(u1==1)], thr=uu[which(u1==1)], npy = 121, ydat =X[which(u1==1),], 
mul = 1:5, sigl = 2:3, shl = NULL, mulink = identity, siglink = identity, shlink =
identity, muinit = pp1$mle[1:6], siginit = c(pp1$mle[7],0,0), shinit = pp1$mle[8], show = TRUE,
method = "Nelder-Mead", maxit = 10000)
#method = "BFGS", maxit = 10000,ndeps=rep(1e-6,10))

# nllh = 322.7706
# pp2 much better than pp1 but could still try other models

# goodness of fit to the actual data (model pp1)
rrc=matrix(nrow=10,ncol=4)
for(i in 1:10){
crit=29+0.5*i
# goodness of fit to the actual data (model pp1)
ss1=X[which(u1==1),]%*%pp1$mle[2:6]+pp1$mle[1]
ss2=1+pp1$mle[8]*(crit-ss1)/pp1$mle[7]
ss2[ss2<=0]=0
ss2[ss2>0]=ss2[ss2>0]^(-1/pp1$mle[8])
rrc[i,1]=crit
rrc[i,2]=sum(ss2)/121
rrc[i,3]=sum(y[1:25933]>crit)
rrc[i,4]=(rrc[i,3]*log(rrc[i,3]/rrc[i,2]))-rrc[i,3]+rrc[i,2]
}
rrc=data.frame(rrc)
names(rrc)=c('Temp','Expected','Observed','LRT')
rrc

# goodness of fit to the actual data (model pp2)
rrc=matrix(nrow=10,ncol=4)
for(i in 1:10){
crit=29+0.5*i
ss1=X[which(u1==1),]%*%pp2$mle[2:6]+pp2$mle[1]
sc=X[which(u1==1),2:3]%*%pp2$mle[8:9]+pp2$mle[7]
ss2=1+pp2$mle[10]*(crit-ss1)/sc
ss2[ss2<=0]=0
ss2[ss2>0]=ss2[ss2>0]^(-1/pp2$mle[10])
rrc[i,1]=crit
rrc[i,2]=sum(ss2)/121
rrc[i,3]=sum(y[1:25933]>crit)
rrc[i,4]=(rrc[i,3]*log(rrc[i,3]/rrc[i,2]))-rrc[i,3]+rrc[i,2]
}
rrc=data.frame(rrc)
names(rrc)=c('Temp','Expected','Observed','LRT')
rrc

# both models show good fit for Temp>=30, but pp2 is better



# expected number of exceedances of level crit when GMST=gmst[143,2]=0.9329167
dd=135:255
for(i in 1:7){
crit=32.5+0.5*i
crit=36+0.1*i
X1=cbind(rep(1,121),rep(gmst[143,2],121),cos(2*pi*dd/365),sin(2*pi*dd/365),cos(4*pi*dd/365),sin(4*pi*dd/365))
ss1=X1%*%pp2$mle[1:6]
sc=X1[,3:4]%*%pp2$mle[8:9]+pp2$mle[7]
ss2=1+pp2$mle[10]*(crit-ss1)/sc
ss2[ss2<=0]=0
ss2[ss2>0]=ss2[ss2>0]^(-1/pp2$mle[10])
prF=sum(ss2)/121
X1=cbind(rep(1,121),rep(gmst[143,2]-1.2,121),cos(2*pi*dd/365),sin(2*pi*dd/365),cos(4*pi*dd/365),sin(4*pi*dd/365))
ss1=X1%*%pp2$mle[1:6]
sc=X1[,3:4]%*%pp2$mle[8:9]+pp2$mle[7]
ss2=1+pp2$mle[10]*(crit-ss1)/sc
ss2[ss2<=0]=0
ss2[ss2>0]=ss2[ss2>0]^(-1/pp2$mle[10])
prC=sum(ss2)/121
print(c(crit,prF,prC,prF/prC))
}
# comment: these results are a little bit more satisfactory than the GEV results, e.g. estimated endpoints are higher 
# under this model but still below the 2021 value

# the basic problem does not go away

# program likelihood for model pp2: we want to check MLE calculation but then calculate Bayesian extension

# thelikelihood function is derived from pp.lik function within pp.fit

yy=y[which(u1==1)]
thr=uu[which(u1==1)]
X1=X[which(u1==1),]
ny=length(yy)
npy=121
pplh2=function(par){
mu=rep(par[1],ny)+X1[1:ny,]%*%par[2:6]
sig=X1[1:ny,2:3]%*%par[8:9]+par[7]
xi=rep(par[10],ny)
uInd=yy[1:ny]>thr[1:ny]
u=thr[1:ny]
if(min(sig)<0)return(10^10)
if (min((1 + ((xi * (u - mu))/sig))^uInd) < 0) {l=10^10}
else {
y1 <- (yy[1:ny] - mu)/sig
y1 <- 1 + xi * y1
if (min(y1) <= 0) l=10^10
else 
l <- sum(uInd * log(sig)) + sum(uInd * log(y1) * (1/xi + 1)) + ny/npy * mean((1 + (xi * (u - mu))/sig)^(-1/xi))
}
l
}

# reality check: we get same mle as pp.fit

par=c(pp1$mle[1:7],0,0,pp1$mle[8])
pplh2(par)

opt2=optim(par,pplh2,method="BFGS",control=list(ndeps=rep(1e-6,length(par)),maxit=10000),hessian=T)
opt2=optim(par,pplh2,method="Nelder-Mead",control=list(maxit=10000),hessian=T)
opt2$par
sqrt(diag(solve(opt2$hess)))
pp2$mle
pp2$se
# model fits agree


# now do Bayesian (fix nsim=20000 for quick demonstration; original used 200000 in about 12 minutes)

par=opt2$par
npar=length(par)
C0=solve(opt2$hessian)
C0=pp2$cov
nsim=200000
nn1=nsim/10
nn2=50
nn3=10
eps=0.001
scal=2.4
set.seed(123)
T1=Sys.time()
bay1=AdaptMH(pplh2,par,npar,C0,nsim,nn1,nn2,nn3,eps,scal)
T2=Sys.time()
print(T2-T1)
print(bay1$acc0/nsim)
# discard first half of chain as warmup

parout=bay1$parout[(nrow(bay1$parout)/2+1):nrow(bay1$parout),]

# trace plots: start with j=0
j=j+1
plot(parout[,j],type='l')



for(j in 1:10){print(c(mean(parout[,j]),sd(parout[,j])))}

for(j in 1:10){print(c(pp2$mle[j],pp2$se[j]))}

# good agreement between Bayesian and MLE results (they aren't supposed to be identical)

# alternatively (load precalculated values)
parout2=read.csv('https://rls.sites.oasis.unc.edu/s834-2023/Data/parout2.csv',header=F)
for(j in 1:10){print(c(mean(parout2[,j]),sd(parout2[,j])))}


# expected number of exceedances of level crit when GMST=gmst[143,2]=0.9329167
for(i in 1:6){
crit=33+i
dd=135:255
X1=cbind(rep(1,121),rep(gmst[143,2],121),cos(2*pi*dd/365),sin(2*pi*dd/365),cos(4*pi*dd/365),sin(4*pi*dd/365))
#X1%*%parout[1,1:6]
ss1=X1%*%t(parout[,1:6])
sc=X1[,c(1,3,4)]%*%t(parout[,7:9])
sh=t(matrix(rep(parout[,10],121),ncol=121))
ss2=1+sh*(crit-ss1)/sc
ss2[ss2<0]=0
ss2[ss2>0]=ss2[ss2>0]^(-1/sh[ss2>0])
ss2a=rep(1/121,121)%*%ss2
X1=cbind(rep(1,121),rep(gmst[143,2]-1.2,121),cos(2*pi*dd/365),sin(2*pi*dd/365),cos(4*pi*dd/365),sin(4*pi*dd/365))
#X1%*%parout[1,1:6]
ss1=X1%*%t(parout[,1:6])
sc=X1[,c(1,3,4)]%*%t(parout[,7:9])
sh=t(matrix(rep(parout[,10],121),ncol=121))
ss2=1+sh*(crit-ss1)/sc
ss2[ss2<0]=0
ss2[ss2>0]=ss2[ss2>0]^(-1/sh[ss2>0])
ss2b=rep(1/121,121)%*%ss2
ss2c=ss2b/ss2a
ss2c[ss2a==0]=0
#print(c(crit,mean(ss2a),mean(ss2b),mean(ss2c)))
#quantile(ss2c,c(0.025,0.975))
#print(c(mean(ss2c<0.1),mean(ss2c<0.01),mean(ss2c<0.001)))
print(c(crit,mean(ss2a),mean(ss2b),mean(ss2c),quantile(ss2c,c(0.025,0.975)),mean(ss2c<0.01)))
}