##
## Part 1: R preliminaries
##

# See the path for the current working directory.
getwd()

# Creating and assigning vectors.
x <- c( -1, 4, 0)

# Creating and assigning matrices.
y <- cbind( c(2, 1, 5), c(3,7,9))

# Save the current workspace.
save.image()

# Quit R with and without saving the workspace, and interactively.
# q("yes")
# q("no")
# q()

#
# Subsetting vectors and matrices.
#
x[3]
x[1:2]
x[c(1,3)]
x[-2]

y[1,]
y1 <- y[,1]
y2 <- y[,2]

y[2,1] <- NA

#
# Logicals and missing values.
#
any( is.na( x))
any( is.na( y))

y[ is.na( y)] <- -999.0
x == 0


#
# Contributed Packages.
#

# Install some packages.  This function need only be done one time (per new version of the package).
install.packages( c("fields", "evd", "evdbayes", "ismev", "maps", "SpatialExtremes"))

# Load the packages into R.  This must be done for every new session of R
# where it is desired to use the package.
library( fields)
library( evd)
library( evdbayes)
library( ismev)
library( SpatialExtremes) # Note the warnings, could be a problem.

# Find the position of the 'SpatialExtremes' package and detach for now.
search() # it is in position 2.
detach(pos=2)
# check that it is no longer loaded.
search()

# See how to reference the 'fields' package in a publication. 
citation("fields")


#
# Simulate some random fields.
#
# help( sim.rf)

grid<- list( x= seq( 0,5,,100), y= seq(0,5,,100)) 
obj<-Exp.image.cov( grid=grid, theta=.5, setup=TRUE)
look<- sim.rf( obj)
look2<- sim.rf( obj)

set.panel(2,1)
image.plot( grid$x, grid$y, look) 
title("simulated gaussian field")
image.plot( grid$x, grid$y, look2) 
title("another (independent) realization ...")

#
# Plotting in R.
#

X11()
z <- rnorm( 10) # sample of size 10 from a normal(0,1) distribution.
plot( 1:10, z, type="l", xlab="", ylab="z",
	main="Std Normal Random Sample")
points( 1:10, z, col="red", pch="s", cex=2)
lines( 1:10, rnorm(10), col="blue", lwd=2, lty=2)

# Normal Q-Q plot (to check for normality of z).
qqnorm( z)

# Close the plotting device.
dev.off()

##
## Part 2: Extreme Value Analysis
##

#
# Background for EVA.
#

require( evd)
require( extRemes) # will also load 'ismev' automatically.

# Simulations.

# Uniform(0,1) distribution.
U <- runif( 1000)
hist( U)

# Normal(0, 1) distribution.
Z <- rnorm( 1000)
hist( Z)

# Gumbel distribution.
# require( evd)
M <- rgev( 1000)
hist( M)

# Simulate 1000 maxima from samples of size 30 from the
# normal distribution.
Zmax <- matrix( NA, 30, 1000)
dim( Zmax)
for( i in 1:1000) Zmax[,i] <- rnorm( 30)
Zmax <- apply( Zmax, 2, max)
hist( Zmax, breaks="FD", col="blue")

# Some properties of the 'Zmax' object.
dim( Zmax)
class( Zmax)
length( Zmax)

# Simulate 1000 maxima from samples of size 30 from the
# uniform (0,1) distribution.
Umax <- matrix( NA, 30, 1000)
for( i in 1:1000) Umax[,i] <- runif( 30)
Umax <- apply( Umax, 2, max)
hist( Umax, breaks="FD", col="blue")

# Simulate 1000 maxima from samples of size 30 from the
# Frechet distribution.
Fmax <- matrix( NA, 30, 1000)
# require( evd)
for( i in 1:1000) Fmax[,i] <- rgev( 30, loc=2, scale=2.5, shape=0.5)
Fmax <- apply( Fmax, 2, max, finite=TRUE, na.rm=TRUE)
hist( Fmax, breaks="FD", col="blue")

# Simulate 1000 maxima of size 30 from the exponential
# distribution.
Emax <- matrix( NA, 30, 1000)
for( i in 1:1000) Emax[,i] <- rexp( 30)
Emax <- apply( Emax, 2, max)
hist( Emax, breaks="FD", col="blue")

# Compare 'Emax' with samples from the exponential distribution.
par( mfrow=c(1,2))
hist( Emax, breaks="FD", col="blue")
hist( rexp(1000), breaks="FD", col="blue")

# Make a plot of the pdf for the Gumbel.
plot( seq(0,5,,200), dgev( seq(0,5,,200), loc=2, scale=0.75),
        type="l", col="red", cex.axis=2, cex.lab=2, cex.main=2, lwd=2.5, xlab="", ylab="pdf", main="Gumbel")

# Probability of exceeding increasingly high values.
pnorm( c(1,2,4,8,16,32), lower.tail=FALSE) # Normal (0,1)
pgev( c(1,2,4,8,16,32), lower.tail=FALSE) # Gumbel
pgev( c(1,2,4,8,16,32), shape=0.5, lower.tail=FALSE) # Frechet
pgev( c(1,2,4,8,16,32), shape=-0.5, lower.tail=FALSE) # Weibull

# Normal vs GEV
par( mfrow=c(2,1), mar=c(5,4,0.5,0.5))
cdfNorm <- pnorm( 0:20)
cdfGum <- pgev( 0:20)
cdfFrech <- pgev( 0:20, shape=0.5)
cdfWeib <- pgev( 0:20, shape=-0.5)
plot( 0:20, cdfNorm, ylim=c(0,1), type="l", xaxt="n", col="blue", lwd=2, xlab="", ylab="F(x)")
lines( 0:20, cdfGum, col="green", lty=2, lwd=2)
lines( 0:20, cdfFrech, col="red", lwd=2)
lines( 0:20, cdfWeib, col="orange", lwd=2)
legend( 10, 0.8, legend=c("Normal", "Gumbel", "Frechet", "Weibull"),
        col=c("blue", "green", "red", "orange"), lty=c(1,2,1,1), bty="n", lwd=2)

pdfNorm <- dnorm( 0:20)
xGum <- dgev( 0:20)
pdfFrech <- dgev( 0:20, shape=0.5)
pdfWeib <- dgev( 0:20, shape=-0.5)
plot( 0:20, pdfNorm, ylim=c(0,1), type="l", col="blue", lwd=2, xlab="x", ylab="f(x)")
lines( 0:20, xGum, col="green", lty=2, lwd=2)
lines( 0:20, pdfFrech, col="red", lwd=2)
lines( 0:20, pdfWeib, col="orange", lwd=2)

##
## Part 2b: Fitting to a GEV (stationary case).
##

#
# Fort Collins annual maximum precipitation.
#
data("ftcanmax")
plot( ftcanmax$Year, ftcanmax$Prec/100, typ="l", col="blue", lwd=2, xlab="Year", ylab="Precipitation (in)",
        main="Fort Collins annual maximum daily precipitation", col.main="darkblue", cex.main=1.5,
        col.axis="darkblue", col.lab="darkblue", cex.lab=1.5, cex.axis=1.5)
text( ftcanmax$Year[ftcanmax$Year==1997]-0.5, ftcanmax$Prec[ftcanmax$Year==1997]/100,
        labels="1997", col="red", cex=1.5)

fit <- gev.fit( ftcanmax$Prec/100)
gev.diag( fit)

# Likelihood ratio test for xi=0.
fit0 <- gum.fit( ftcanmax$Prec/100)
# gum.diag( fit0)

Dev <- 2*(fit0$nllh - fit$nllh)
pchisq( Dev, 1, lower.tail=FALSE)

# Profile likelihood for the shape parameter.
gev.profxi( fit, xlow=-0.1, xup=0.45)

# Add a vertical line at xi=0.  Note it crosses lower horizontal blue
# linebefore the profile likelihood (consistent with result of likelihood
# ratio test above).
abline(v=0, lty=2)

# Interactively find where the profile likelihood crosses the lower blue
# horizontal line.
locator(2) # Now, click on plot where the lines cross to find the values.

# Profile likelihood for the 100-year return level.
gev.prof( fit, m=100, xlow=3.5, xup=8.5)
locator(2)

# Probability of obtaining the value recorded during the big flood on 28 July 1997.
# Note, however, that the guage recording these values was away m where the flooding
# occurred (so not really representative of that event).  The actual value where the
# flooding occurred was around 4.6 in.
pgev( 1.54, loc=fit$mle[1], scale=fit$mle[2], shape=fit$mle[3], lower.tail=FALSE)
pgev( 4.6, loc=fit$mle[1], scale=fit$mle[2], shape=fit$mle[3], lower.tail=FALSE)

##
## Part 2c: Threshold Excess Models (stationary case).
##

#
# Hurricane damage data.
#
data("damage")

## Scatter plot by year.
plot( damage$Year, damage$Dam, xlab="Year", ylab="billion US$",
        main="Economic Damage from Hurricanes (1925-1995)",
        col="darkblue",
        col.lab="darkblue", col.axis="darkblue", col.main="darkblue",
        cex.lab=1.5, cex.axis=1.25, cex.main=1.5, cex=1.25)

# Using threshold of 6 billion USD, fit to GPD.
damfit <- gpd.fit( damage$Dam, threshold=6)
gpd.diag( damfit)

# Find 95% CI's for shape parameter using profile likelihood method.
gpd.profxi( damfit, xlow=0, xup=2)
locator(2)

## Threshold selection.
gpd.fitrange( damage$Dam, 2, 7)

#
# Phoenix temperature data.
#
data(Tphap)

phx.fit0 <- gpd.fit( -Tphap$MinT, -73)

# De-cluster with runs declustering and r=1.
phx.dc <- dclust( -Tphap$MinT, u=-73, r=1, cluster.by=Tphap$Year) 
phxdc.fit0 <- gpd.fit( phx.dc$xdat.dc, -73)

#
# Fort Collins daily precipitation.
#
data("FtCoPrec")

## Point Process: Orthogonal Approach.
# Fit to GPD with u=0.395
ftcodgpd.fit <- gpd.fit( FtCoPrec[,"Prec"], 0.395)

# Find MLE for Poisson rate parameter (i.e., the mean number of exceedances per year).
365.25*sum( FtCoPrec[,"Prec"] > 0.395, na.rm=TRUE)/dim(FtCoPrec)[1]

# Point Process: GEV Re-parameterization Approach.
ftcodpp.fit <- pp.fit( FtCoPrec[,"Prec"], 0.395)
(1+ftcodpp.fit$mle[3]*(0.395-ftcodpp.fit$mle[1])/ftcodpp.fit$mle[2])^(-1/ftcodpp.fit$mle[3])

#
# Risk Communication (under stationarity).
#

# Return levels/periods.
# without de-clustering.
class( phx.fit0) <- "gpd.fit"
phxfit0.rl <- return.level( phx.fit0, make.plot=FALSE)

## with runs de-clustering (r=1)
class( phxdc.fit0) <- "gpd.fit"
phxdcfit0.rl <- return.level( phxdc.fit0, make.plot=FALSE)

##
## Part 3: Non-Stationarity.
##

# Phoenix minimum temperature.
# data( Tphap)

# First, obtain annual maxima from daily (over two months per year) data.
MinT <- aggregate( Tphap$MinT, by=list(Tphap$Year), min, na.rm=TRUE)$x

# For plotting, make a vector with more readable years.
Yr   <- 1900 + unique( Tphap$Year)

# For fitting the model, though, just use t=1,2,... for years.
yr <- 1:length(Yr)

# Put the above into a matrix form so that it will work with 'gev.fit'.
ycov <- matrix( yr, ncol=1)

# Now fit to the GEV with a linear trend in the location and scale parameters.
fit <- gev.fit( -MinT, ydat=ycov, mul=1, sigl=1, siglink=exp)

# Now fit with linear trend only in the location parameter.
fit1 <- gev.fit( -MinT, ydat=ycov, mul=1)

# Now with linear trend only in the scale parameter (with log link function).
fit2 <- gev.fit( -MinT, ydat=ycov, sigl=1, siglink=exp)

# Likelihood ratio test for mu1=0 and for sigma1=0, resp.
pchisq( -2*(fit$nllh - fit2$nllh), 1, lower.tail=FALSE) # Highly significant.
pchisq(-2*(fit$nllh - fit1$nllh), 1, lower.tail=FALSE) # Not significant.

# Probability and QQ-Plot for model with linear trend in location parameter only.
gev.diag( fit1)

#
# Other Covariates in Parameters
#

# Port Jervis winter maxima data
data(PORTw)

ycov <- matrix( PORTw$AOindex, ncol=1)
fit0 <- gev.fit( PORTw$TMX1)
fit1 <- gev.fit( PORTw$TMX1, ydat=ycov, mul=1)
fit2 <- gev.fit( PORTw$TMX1, ydat=ycov, sigl=1, siglink=exp)
fit <- gev.fit( PORTw$TMX1, ydat=ycov, mul=1, sigl=1, siglink=exp)

# Likelihood ratio test for mu1=0
pchisq( -2*(fit$nllh - fit2$nllh), 1, lower.tail=FALSE)

# Likelihood ratio test for sigma1=0
pchisq( -2*(fit$nllh - fit1$nllh), 1, lower.tail=FALSE)

#
# Cyclic variation
#

# Fort Collins precipitation.
data(FtCoPrec)
prec <- FtCoPrec[,"Prec"]

plot( FtCoPrec[,"month"], prec, xlab="", xaxt="n", ylab="Precipitation (in)")

# Fit annual cycle in Poisson rate parameter.
# First, convert precip vector to a binary vector indicating
# excesses over 0.395 inches.
ind <- prec > 0.395

# Set up cyclic trend vectors.
trend1 <- sin(2*pi*(1:length(prec))/365.25)
trend2 <- cos(2*pi*(1:length(prec))/365.25)

# Fit annual cycle in Poisson rate parameter (orthogonal approach).
lamfit <- glm( ind~trend1+trend2, family=poisson())

# See a better summary of the fit (including significance test results).
summary( lamfit)

# Column bind the trends for use in 'gpd.fit' and 'pp.fit' functions.
ycov <- cbind( trend1, trend2)

## Orthogonal approach.  fit GPS with annual cycle in scale parameter.
fitOrth <- gpd.fit( prec, 0.395, ydat=ycov, sigl=c(1,2), siglink=exp)
fitOrth0 <- gpd.fit( prec, 0.395)
pchisq( -2*(fitOrth$nllh - fitOrth0$nllh), 2, lower.tail=FALSE)

# GEV re-parameterization approach.
fit0 <- pp.fit( prec, 0.395)
fit1 <- pp.fit( xdat=prec, threshold=0.395, npy=365.25, ydat=ycov, mul=c(1,2))
fit2 <- pp.fit( xdat=prec, threshold=0.395, npy=365.25, ydat=ycov, sigl=c(1,2), siglink=exp)
fit <- pp.fit( xdat=prec, threshold=0.395, npy=365.25, ydat=ycov, mul=c(1,2), sigl=c(1,2), siglink=exp)

# Likelihood ratio test of mu1=mu2=0.
# pchisq( -2*(fit1$nllh-fit0$nllh), 2, lower.tail=FALSE)
pchisq( -2*(fit$nllh-fit2$nllh), 2, lower.tail=FALSE)

# sigma1=sigma2=0.
pchisq( -2*(fit$nllh - fit1$nllh), 2, lower.tail=FALSE)

# Probability and QQ-Plots.
pp.diag( fit)