## ## Part 1: R preliminaries ## # See the path for the current working directory. getwd() # Creating and assigning vectors. x <- c( -1, 4, 0) # Creating and assigning matrices. y <- cbind( c(2, 1, 5), c(3,7,9)) # Save the current workspace. save.image() # Quit R with and without saving the workspace, and interactively. # q("yes") # q("no") # q() # # Subsetting vectors and matrices. # x[3] x[1:2] x[c(1,3)] x[-2] y[1,] y1 <- y[,1] y2 <- y[,2] y[2,1] <- NA # # Logicals and missing values. # any( is.na( x)) any( is.na( y)) y[ is.na( y)] <- -999.0 x == 0 # # Contributed Packages. # # Install some packages. This function need only be done one time (per new version of the package). install.packages( c("fields", "evd", "evdbayes", "ismev", "maps", "SpatialExtremes")) # Load the packages into R. This must be done for every new session of R # where it is desired to use the package. library( fields) library( evd) library( evdbayes) library( ismev) library( SpatialExtremes) # Note the warnings, could be a problem. # Find the position of the 'SpatialExtremes' package and detach for now. search() # it is in position 2. detach(pos=2) # check that it is no longer loaded. search() # See how to reference the 'fields' package in a publication. citation("fields") # # Simulate some random fields. # # help( sim.rf) grid<- list( x= seq( 0,5,,100), y= seq(0,5,,100)) obj<-Exp.image.cov( grid=grid, theta=.5, setup=TRUE) look<- sim.rf( obj) look2<- sim.rf( obj) set.panel(2,1) image.plot( grid$x, grid$y, look) title("simulated gaussian field") image.plot( grid$x, grid$y, look2) title("another (independent) realization ...") # # Plotting in R. # X11() z <- rnorm( 10) # sample of size 10 from a normal(0,1) distribution. plot( 1:10, z, type="l", xlab="", ylab="z", main="Std Normal Random Sample") points( 1:10, z, col="red", pch="s", cex=2) lines( 1:10, rnorm(10), col="blue", lwd=2, lty=2) # Normal Q-Q plot (to check for normality of z). qqnorm( z) # Close the plotting device. dev.off() ## ## Part 2: Extreme Value Analysis ## # # Background for EVA. # require( evd) require( extRemes) # will also load 'ismev' automatically. # Simulations. # Uniform(0,1) distribution. U <- runif( 1000) hist( U) # Normal(0, 1) distribution. Z <- rnorm( 1000) hist( Z) # Gumbel distribution. # require( evd) M <- rgev( 1000) hist( M) # Simulate 1000 maxima from samples of size 30 from the # normal distribution. Zmax <- matrix( NA, 30, 1000) dim( Zmax) for( i in 1:1000) Zmax[,i] <- rnorm( 30) Zmax <- apply( Zmax, 2, max) hist( Zmax, breaks="FD", col="blue") # Some properties of the 'Zmax' object. dim( Zmax) class( Zmax) length( Zmax) # Simulate 1000 maxima from samples of size 30 from the # uniform (0,1) distribution. Umax <- matrix( NA, 30, 1000) for( i in 1:1000) Umax[,i] <- runif( 30) Umax <- apply( Umax, 2, max) hist( Umax, breaks="FD", col="blue") # Simulate 1000 maxima from samples of size 30 from the # Frechet distribution. Fmax <- matrix( NA, 30, 1000) # require( evd) for( i in 1:1000) Fmax[,i] <- rgev( 30, loc=2, scale=2.5, shape=0.5) Fmax <- apply( Fmax, 2, max, finite=TRUE, na.rm=TRUE) hist( Fmax, breaks="FD", col="blue") # Simulate 1000 maxima of size 30 from the exponential # distribution. Emax <- matrix( NA, 30, 1000) for( i in 1:1000) Emax[,i] <- rexp( 30) Emax <- apply( Emax, 2, max) hist( Emax, breaks="FD", col="blue") # Compare 'Emax' with samples from the exponential distribution. par( mfrow=c(1,2)) hist( Emax, breaks="FD", col="blue") hist( rexp(1000), breaks="FD", col="blue") # Make a plot of the pdf for the Gumbel. plot( seq(0,5,,200), dgev( seq(0,5,,200), loc=2, scale=0.75), type="l", col="red", cex.axis=2, cex.lab=2, cex.main=2, lwd=2.5, xlab="", ylab="pdf", main="Gumbel") # Probability of exceeding increasingly high values. pnorm( c(1,2,4,8,16,32), lower.tail=FALSE) # Normal (0,1) pgev( c(1,2,4,8,16,32), lower.tail=FALSE) # Gumbel pgev( c(1,2,4,8,16,32), shape=0.5, lower.tail=FALSE) # Frechet pgev( c(1,2,4,8,16,32), shape=-0.5, lower.tail=FALSE) # Weibull # Normal vs GEV par( mfrow=c(2,1), mar=c(5,4,0.5,0.5)) cdfNorm <- pnorm( 0:20) cdfGum <- pgev( 0:20) cdfFrech <- pgev( 0:20, shape=0.5) cdfWeib <- pgev( 0:20, shape=-0.5) plot( 0:20, cdfNorm, ylim=c(0,1), type="l", xaxt="n", col="blue", lwd=2, xlab="", ylab="F(x)") lines( 0:20, cdfGum, col="green", lty=2, lwd=2) lines( 0:20, cdfFrech, col="red", lwd=2) lines( 0:20, cdfWeib, col="orange", lwd=2) legend( 10, 0.8, legend=c("Normal", "Gumbel", "Frechet", "Weibull"), col=c("blue", "green", "red", "orange"), lty=c(1,2,1,1), bty="n", lwd=2) pdfNorm <- dnorm( 0:20) xGum <- dgev( 0:20) pdfFrech <- dgev( 0:20, shape=0.5) pdfWeib <- dgev( 0:20, shape=-0.5) plot( 0:20, pdfNorm, ylim=c(0,1), type="l", col="blue", lwd=2, xlab="x", ylab="f(x)") lines( 0:20, xGum, col="green", lty=2, lwd=2) lines( 0:20, pdfFrech, col="red", lwd=2) lines( 0:20, pdfWeib, col="orange", lwd=2) ## ## Part 2b: Fitting to a GEV (stationary case). ## # # Fort Collins annual maximum precipitation. # data("ftcanmax") plot( ftcanmax$Year, ftcanmax$Prec/100, typ="l", col="blue", lwd=2, xlab="Year", ylab="Precipitation (in)", main="Fort Collins annual maximum daily precipitation", col.main="darkblue", cex.main=1.5, col.axis="darkblue", col.lab="darkblue", cex.lab=1.5, cex.axis=1.5) text( ftcanmax$Year[ftcanmax$Year==1997]-0.5, ftcanmax$Prec[ftcanmax$Year==1997]/100, labels="1997", col="red", cex=1.5) fit <- gev.fit( ftcanmax$Prec/100) gev.diag( fit) # Likelihood ratio test for xi=0. fit0 <- gum.fit( ftcanmax$Prec/100) # gum.diag( fit0) Dev <- 2*(fit0$nllh - fit$nllh) pchisq( Dev, 1, lower.tail=FALSE) # Profile likelihood for the shape parameter. gev.profxi( fit, xlow=-0.1, xup=0.45) # Add a vertical line at xi=0. Note it crosses lower horizontal blue # linebefore the profile likelihood (consistent with result of likelihood # ratio test above). abline(v=0, lty=2) # Interactively find where the profile likelihood crosses the lower blue # horizontal line. locator(2) # Now, click on plot where the lines cross to find the values. # Profile likelihood for the 100-year return level. gev.prof( fit, m=100, xlow=3.5, xup=8.5) locator(2) # Probability of obtaining the value recorded during the big flood on 28 July 1997. # Note, however, that the guage recording these values was away m where the flooding # occurred (so not really representative of that event). The actual value where the # flooding occurred was around 4.6 in. pgev( 1.54, loc=fit$mle[1], scale=fit$mle[2], shape=fit$mle[3], lower.tail=FALSE) pgev( 4.6, loc=fit$mle[1], scale=fit$mle[2], shape=fit$mle[3], lower.tail=FALSE) ## ## Part 2c: Threshold Excess Models (stationary case). ## # # Hurricane damage data. # data("damage") ## Scatter plot by year. plot( damage$Year, damage$Dam, xlab="Year", ylab="billion US$", main="Economic Damage from Hurricanes (1925-1995)", col="darkblue", col.lab="darkblue", col.axis="darkblue", col.main="darkblue", cex.lab=1.5, cex.axis=1.25, cex.main=1.5, cex=1.25) # Using threshold of 6 billion USD, fit to GPD. damfit <- gpd.fit( damage$Dam, threshold=6) gpd.diag( damfit) # Find 95% CI's for shape parameter using profile likelihood method. gpd.profxi( damfit, xlow=0, xup=2) locator(2) ## Threshold selection. gpd.fitrange( damage$Dam, 2, 7) # # Phoenix temperature data. # data(Tphap) phx.fit0 <- gpd.fit( -Tphap$MinT, -73) # De-cluster with runs declustering and r=1. phx.dc <- dclust( -Tphap$MinT, u=-73, r=1, cluster.by=Tphap$Year) phxdc.fit0 <- gpd.fit( phx.dc$xdat.dc, -73) # # Fort Collins daily precipitation. # data("FtCoPrec") ## Point Process: Orthogonal Approach. # Fit to GPD with u=0.395 ftcodgpd.fit <- gpd.fit( FtCoPrec[,"Prec"], 0.395) # Find MLE for Poisson rate parameter (i.e., the mean number of exceedances per year). 365.25*sum( FtCoPrec[,"Prec"] > 0.395, na.rm=TRUE)/dim(FtCoPrec)[1] # Point Process: GEV Re-parameterization Approach. ftcodpp.fit <- pp.fit( FtCoPrec[,"Prec"], 0.395) (1+ftcodpp.fit$mle[3]*(0.395-ftcodpp.fit$mle[1])/ftcodpp.fit$mle[2])^(-1/ftcodpp.fit$mle[3]) # # Risk Communication (under stationarity). # # Return levels/periods. # without de-clustering. class( phx.fit0) <- "gpd.fit" phxfit0.rl <- return.level( phx.fit0, make.plot=FALSE) ## with runs de-clustering (r=1) class( phxdc.fit0) <- "gpd.fit" phxdcfit0.rl <- return.level( phxdc.fit0, make.plot=FALSE) ## ## Part 3: Non-Stationarity. ## # Phoenix minimum temperature. # data( Tphap) # First, obtain annual maxima from daily (over two months per year) data. MinT <- aggregate( Tphap$MinT, by=list(Tphap$Year), min, na.rm=TRUE)$x # For plotting, make a vector with more readable years. Yr <- 1900 + unique( Tphap$Year) # For fitting the model, though, just use t=1,2,... for years. yr <- 1:length(Yr) # Put the above into a matrix form so that it will work with 'gev.fit'. ycov <- matrix( yr, ncol=1) # Now fit to the GEV with a linear trend in the location and scale parameters. fit <- gev.fit( -MinT, ydat=ycov, mul=1, sigl=1, siglink=exp) # Now fit with linear trend only in the location parameter. fit1 <- gev.fit( -MinT, ydat=ycov, mul=1) # Now with linear trend only in the scale parameter (with log link function). fit2 <- gev.fit( -MinT, ydat=ycov, sigl=1, siglink=exp) # Likelihood ratio test for mu1=0 and for sigma1=0, resp. pchisq( -2*(fit$nllh - fit2$nllh), 1, lower.tail=FALSE) # Highly significant. pchisq(-2*(fit$nllh - fit1$nllh), 1, lower.tail=FALSE) # Not significant. # Probability and QQ-Plot for model with linear trend in location parameter only. gev.diag( fit1) # # Other Covariates in Parameters # # Port Jervis winter maxima data data(PORTw) ycov <- matrix( PORTw$AOindex, ncol=1) fit0 <- gev.fit( PORTw$TMX1) fit1 <- gev.fit( PORTw$TMX1, ydat=ycov, mul=1) fit2 <- gev.fit( PORTw$TMX1, ydat=ycov, sigl=1, siglink=exp) fit <- gev.fit( PORTw$TMX1, ydat=ycov, mul=1, sigl=1, siglink=exp) # Likelihood ratio test for mu1=0 pchisq( -2*(fit$nllh - fit2$nllh), 1, lower.tail=FALSE) # Likelihood ratio test for sigma1=0 pchisq( -2*(fit$nllh - fit1$nllh), 1, lower.tail=FALSE) # # Cyclic variation # # Fort Collins precipitation. data(FtCoPrec) prec <- FtCoPrec[,"Prec"] plot( FtCoPrec[,"month"], prec, xlab="", xaxt="n", ylab="Precipitation (in)") # Fit annual cycle in Poisson rate parameter. # First, convert precip vector to a binary vector indicating # excesses over 0.395 inches. ind <- prec > 0.395 # Set up cyclic trend vectors. trend1 <- sin(2*pi*(1:length(prec))/365.25) trend2 <- cos(2*pi*(1:length(prec))/365.25) # Fit annual cycle in Poisson rate parameter (orthogonal approach). lamfit <- glm( ind~trend1+trend2, family=poisson()) # See a better summary of the fit (including significance test results). summary( lamfit) # Column bind the trends for use in 'gpd.fit' and 'pp.fit' functions. ycov <- cbind( trend1, trend2) ## Orthogonal approach. fit GPS with annual cycle in scale parameter. fitOrth <- gpd.fit( prec, 0.395, ydat=ycov, sigl=c(1,2), siglink=exp) fitOrth0 <- gpd.fit( prec, 0.395) pchisq( -2*(fitOrth$nllh - fitOrth0$nllh), 2, lower.tail=FALSE) # GEV re-parameterization approach. fit0 <- pp.fit( prec, 0.395) fit1 <- pp.fit( xdat=prec, threshold=0.395, npy=365.25, ydat=ycov, mul=c(1,2)) fit2 <- pp.fit( xdat=prec, threshold=0.395, npy=365.25, ydat=ycov, sigl=c(1,2), siglink=exp) fit <- pp.fit( xdat=prec, threshold=0.395, npy=365.25, ydat=ycov, mul=c(1,2), sigl=c(1,2), siglink=exp) # Likelihood ratio test of mu1=mu2=0. # pchisq( -2*(fit1$nllh-fit0$nllh), 2, lower.tail=FALSE) pchisq( -2*(fit$nllh-fit2$nllh), 2, lower.tail=FALSE) # sigma1=sigma2=0. pchisq( -2*(fit$nllh - fit1$nllh), 2, lower.tail=FALSE) # Probability and QQ-Plots. pp.diag( fit)