########### R code for Chapter 2 ### Load and plot new versions of Amherst and Mount Airy data # this will upload directly from the website par(cex=1.3) Y1=read.csv('http://rls.sites.oasis.unc.edu/faculty/rs/source/Data/amh1.csv') plot(Y1$Year,Y1$Temp,pch=20,xlab='Year',ylab='Temperature', main='Annual Mean Temperature in Amherst, MA') lines(Y1$Year,lm(Temp~Year,data=Y1)$fitted) Y2=read.csv('http://rls.sites.oasis.unc.edu/faculty/rs/source/Data/mta1.csv') plot(Y2$MtAiry,Y2$Charl,pch=20,xlab='Mount Airy',ylab='Charleston', main='Summer Mean Temp. in Mount Airy and Charleston') lines(Y2$MtAiry[order(Y2$MtAiry)],lm(Charl~MtAiry,data=Y2)$fitted[order(Y2$MtAiry)]) text(23.8,28.94,'A') text(21.99,25.68,'B') # display in latex-compatible format for(i in 1:31){ print(c(Y1$Year[i],'&',Y1$Temp[i],'&',Y1$Year[i+32],'&',Y1$Temp[i+32], '&',Y1$Year[i+64],'&',Y1$Temp[i+64],'&',Y1$Year[i+95],'&',Y1$Temp[i+95],'\\')) } i=32 print(c(Y1$Year[i],'&',Y1$Temp[i],'&',Y1$Year[i+32],'&',Y1$Temp[i+32],'&&&&\\')) for(i in 1:28){ print(c(Y2$Year[i],'&',Y2$MtAiry[i],'&',Y2$Charl[i],'&',Y2$Year[i+29],'&',Y2$MtAiry[i+29],'&',Y2$Charl[i+29], '&',Y2$Year[i+57],'&',Y2$MtAiry[i+57],'&',Y2$Charl[i+57],'\\')) } print(c(Y2$Year[29],'&',Y2$MtAiry[29],'&&&&&&&\\')) # plot new data postscript('C:/Users/rls/aug20/UNC/STOR664-F23/Book/Figs/Amhnew.eps') par(cex=1.3,pty='s') plot(Y1$Year,Y1$Temp,pch=20,xlab='Year',ylab='Temperature', main='(a) Annual Mean Temperature in Amherst, MA') lines(Y1$Year,lm(Temp~Year,data=Y1)$fitted) dev.off() postscript('C:/Users/rls/aug20/UNC/STOR664-F23/Book/Figs/Mtanew.eps') par(cex=1.3,pty='s') plot(Y2$MtAiry,Y2$Charl,pch=20,xlab='Mount Airy',ylab='Charleston', main='(b) Summer Mean Temp. in Mount Airy and Charleston') lines(Y2$MtAiry[order(Y2$MtAiry)],lm(Charl~MtAiry,data=Y2)$fitted[order(Y2$MtAiry)]) text(23.8,28.94,'A') text(21.99,25.68,'B') dev.off() # raw calculations x=Y1[,1] y=Y1[,2] n=length(x) #x=1:n x=x/100 # or x=Y2[,2] y=Y2[,3] n=length(x) # continue sx=sum(x) sy=sum(y) sxx=sum(x*x) sxy=sum(x*y) syy=sum(y*y) print(c(sx,sy,sxx,sxy,syy)) sx=sx/n sy=sy/n sxx=sxx-n*sx*sx sxy=sxy-n*sx*sy syy=syy-n*sy*sy print(c(sx,sy,sxx,sxy,syy)) sxy/sxx sy-sx*sxy/sxx summary(lm(y~x)) 74.028-(13.284)^2/16.9403 sqrt(63.611/124) D=data.frame(x,y) lm1=lm(y~x,D) D1=data.frame(x=26,y=NA) predict(lm1,D1,interval='confidence') predict(lm1,D1,interval='prediction',level=0.95) predict(lm1,D1,interval='prediction') 26.80447+0.392085*(26-23.42271) 0.758126*qt(0.975,83)*sqrt(1/85+(26-23.42271)^2/39.33528) 27.81499-0.758126*qt(0.975,83)*sqrt(1/85+(26-23.42271)^2/39.33528) 27.81499+0.758126*qt(0.975,83)*sqrt(1/85+(26-23.42271)^2/39.33528) 0.758126*qt(0.975,83)*sqrt(1+1/85+(26-23.42271)^2/39.33528) 27.81499-0.758126*qt(0.975,83)*sqrt(1+1/85+(26-23.42271)^2/39.33528) 27.81499+0.758126*qt(0.975,83)*sqrt(1+1/85+(26-23.42271)^2/39.33528) arima(y,order=c(1,0,0),xreg=x)$coef sqrt(diag(arima(y,order=c(1,0,0),xreg=x)$var.coef)) arima(y,order=c(1,0,0),xreg=x)$coef[3]/sqrt(diag(arima(y,order=c(1,0,0),xreg=x)$var.coef))[3] # checking quadratic fit for amherst x=Y1$Year/100 x1=(x-mean(x)) x2=x1^2-mean(x1^2) sum(x1*x1) sum(x2*x2) sum(x1*x2) Del=sum(x1*x1)*sum(x2*x2)-sum(x1*x2)^2 s0=sum(y) s1=sum(y*x1) s2=sum(y*x2) print(c(s0,s1,s2)) b0=s0/n b1=s1/sum(x1*x1) b2=s2/sum(x2*x2) syy=sum((y-mean(y))^2) s=sqrt((syy-b1*s1-b2*s2)/(n-3)) s/sqrt(n) s/sqrt(sum(x1*x1)) s/sqrt(sum(x2*x2)) T=b2/(s/sqrt(sum(x2*x2))) 2*pt(T,n-2) # predicting to 2040 D=data.frame(x1,x2,y) lm1=lm(y~x1+x2,D) x1s=20.4-mean(x) x2s=x1s^2-mean(x1^2) D1=data.frame(x1=x1s,x2=x2s) predict(lm1,D1,interval='confidence') predict(lm1,D1,interval='prediction') X=cbind(rep(1,126),x1,x2) t(X)%*%X solve(t(X)%*%X) cc=c(1,x1s,x2s) cc %*% solve(t(X)%*%X) %*% cc # The rest of the code uses the old versions of these datasets amh=read.table('http://rls.sites.oasis.unc.edu/faculty/rs/source/Data/amherst.s',header=T) cha=read.table('http://rls.sites.oasis.unc.edu/faculty/rs/source/Data/charles.dat',header=F) names(cha)=c('Year','MtAiry','Charleston') # initial code to produce figures in Ch2 par(mfrow=c(1,2),cex=1.3) plot(amh$year,amh$temp,xlab='Year',ylab='Temperature',pch=20) lines(amh$year,lm(temp~year,amh)$fitted,lw=3) plot(cha$MtAiry,cha$Charleston,xlab='Mount Airy',ylab='Charleston',pch=20) lines(cha$MtAiry,lm(Charleston~MtAiry,cha)$fitted,lw=3) x=amh$year y=amh$temp # calculate initial statistics n=length(x) sumx=sum(x) sumy=sum(y) sumxx=sum(x*x) sumxy=sum(x*y) sumyy=sum(y*y) xbar=sumx/n ybar=sumy/n sxx=sumxx-n*xbar*xbar sxy=sumxy-n*xbar*ybar syy=sumyy-n*ybar*ybar b0=ybar b1=sxy/sxx print(c(b0,b1)) # [1] 7.39265432 0.01159718 # # intercept in original parameterization print(b0-b1*xbar) # [1] 6.447484 # agrees with displayed formula near bottom of page 39 of Smith-Young notes # Estimation of sigma ssres=syy-sxy*sxy/sxx s2=ssres/(n-2) s1=sqrt(s2) print(c(ssres,s2,s1)) par(mfrow=c(1,1)) # plot the figure 2.1(a) plot(x,y,xlab='Year',ylab='Mean Temperature',pch=20) yfit=b0+b1*(x-xbar) lines(x,yfit) # compute SEs of parameter estimates se1=sqrt(s2/sxx) se0=sqrt(s2*(1/n+xbar*xbar/sxx)) print(c(se0,se1)) # verify this agrees with results of lm command lm1=lm(y~x) summary(lm1) # standardized residuals sres=lm1$resid/summary(lm1)$sigma # qq-plot qqnorm(sres) abline(a=0,b=1) # Kolmogorov-Smirnov test ks.test(sres,y='pnorm') # Anderson-Darling test library(nortest) ad.test(sres) # Other tests of normality shapiro.test(sres) cvm.test(sres) lillie.test(sres) pearson.test(sres) sf.test(sres) ########## charleston data cha=read.table('C:/Users/rls/aug20/UNC/STOR664/R-Code/charles.txt',header=F) x=cha[,2] y=cha[,3] # calculate initial statistics n=length(x) sumx=sum(x) sumy=sum(y) sumxx=sum(x*x) sumxy=sum(x*y) sumyy=sum(y*y) xbar=sumx/n ybar=sumy/n sxx=sumxx-n*xbar*xbar sxy=sumxy-n*xbar*ybar syy=sumyy-n*ybar*ybar b0=ybar b1=sxy/sxx print(c(b0,b1)) # [1] 7.39265432 0.01159718 # # intercept in original parameterization print(b0-b1*xbar) # [1] 6.447484 # agrees with displayed formula near bottom of page 39 of Smith-Young notes # Estimation of sigma ssres=syy-sxy*sxy/sxx s2=ssres/(n-2) s1=sqrt(s2) print(c(ssres,s2,s1)) # compute SEs of parameter estimates se1=sqrt(s2/sxx) se0=sqrt(s2*(1/n+xbar*xbar/sxx)) print(c(se0,se1)) # verify this agrees with results of lm command lm1=lm(y~x) summary(lm1)