# example of noncentral chisq c=qchisq(0.95,6) x=0.1*(0:200) y=pchisq(c,6,ncp=x,lower.tail=F) plot(x,y,type='l',xlab='lambda',ylab='power of test', main='Non-Central Chisq, df=6, alpha=0.05') # example of noncentral F c=qf(0.95,3,10) x=0.1*(0:200) y=pf(c,3,10,ncp=x,lower.tail=F) plot(x,y,type='l',xlab='lambda',ylab='power of test', main='Non-Central F, df=(3,10), alpha=0.05') # example of text page 137 # 18*M^2/5 is lam*sig^2 - solve for lam sig=3.4 c=qf(0.95,4,10) M=8 lam=18*M^2/(5*sig^2) pf(c,4,10,ncp=lam,lower.tail=F) M=10 lam=18*M^2/(5*sig^2) pf(c,4,10,ncp=lam,lower.tail=F) # minitab tree data #trees=read.table('C:/Users/rls/aug20/UNC/STOR664/R-Code/trees.txt',header=T) trees=read.table('http://rls.sites.oasis.unc.edu/faculty/rs/source/Data/tree.dat',header=T) y=log(trees$vol) x1=log(trees$diam) x2=log(trees$ht) par(mfrow=c(1,2)) plot(x1,y,xlab='Log Diameter',ylab='Log Volume',pch=20) plot(x2,y,xlab='Log Height',ylab='Log Volume',pch=20) lm1=lm(y~x1+x2) # test whether beta_1=2, beta_2=1 y1=y-2*x1-x2 lm2=lm(y1~x1+x2) lm3=lm(y1~1) anova(lm3,lm2) par(mfrow=c(2,2)) x1c=x1-mean(x1) x2c=x2-mean(x2) plot(x1c,lm1$resid,ylab='Residual',xlab='x1c',pch=20) plot(x2c,lm1$resid,ylab='Residual',xlab='x2c',pch=20) plot(x1c*x2c,lm1$resid,ylab='Residual',xlab='x1c*x2c',pch=20) plot(lm1$fitted,lm1$resid,ylab='Residual',xlab='Fitted Value',pch=20) # nuclear power plant data # nukes=read.table('C:/Users/rls/aug20/UNC/STOR664/R-Code/nukes.txt',header=T) nukes=read.table('http://rls.sites.oasis.unc.edu/faculty/rs/source/Data/nukes.dat',header=T) nukes$LC=log(nukes$C) nukes$LT1=log(nukes$T1) nukes$LT2=log(nukes$T2) nukes$LS=log(nukes$S) nukes$LN=log(nukes$N) lm4=lm(LC~D+LT1+LT2+LS+PR+NE+CT+BW+LN+PT,nukes) lm5=lm(LC~D+LS+NE+CT+LN+PT,nukes) anova(lm4,lm5) par(mfrow=c(1,2)) plot(lm5$fitted,lm5$residuals,xlab='Fitted Values',ylab='Residuals',pch=20) plot(nukes$PT,lm5$residuals,xlab='PT',ylab='Residuals',pch=20) # could also do it like this: plot(lm5$fitted,lm5$residuals,xlab='Fitted Values',ylab='Residuals',pch=20) plot(factor(nukes$PT),lm5$residuals,xlab='PT',ylab='Residuals',pch=20) ############ illustrate backward selection nukes=read.table('http://rls.sites.oasis.unc.edu/faculty/rs/source/Data/nukes.dat',header=T) nukes$LC=log(nukes$C) nukes$LT1=log(nukes$T1) nukes$LT2=log(nukes$T2) nukes$LS=log(nukes$S) nukes$LN=log(nukes$N) lm4=lm(LC~D+LT1+LT2+LS+PR+NE+CT+BW+LN+PT,nukes) summary(lm4) # drop BW lm5=lm(LC~D+LT1+LT2+LS+PR+NE+CT+LN+PT,nukes) summary(lm5) # drop LT1 lm6=lm(LC~D+LT2+LS+PR+NE+CT+LN+PT,nukes) summary(lm6) # drop LT2 lm7=lm(LC~D+LS+PR+NE+CT+LN+PT,nukes) summary(lm7) # drop PR lm8=lm(LC~D+LS+NE+CT+LN+PT,nukes) summary(lm8) # drop PT lm9=lm(LC~D+LS+NE+CT+LN,nukes) summary(lm9) require(leaps) lm10=regsubsets(LC~D+LT1+LT2+LS+PR+NE+CT+BW+LN+PT,nukes) lm10s=summary(lm10) lm10s$which AIC=32*log(lm10s$rss/32)+(1:8)*2 plot(AIC~I(1:8),ylab='AIC',xlab='Number of Predictors',pch=20) plot(1:8,lm10s$adjr2,xlab='Number of Parameters',ylab='Adjusted R-Square',pch=20) plot(1:8,lm10s$cp,xlab='Number of Parameters',ylab='Cp',pch=20) abline(0,1) lm11=step(lm4) # look for influential observations sort(influence(lm8)$hat) sort(cooks.distance(lm8)) # try omitting two most influential lm13=regsubsets(LC~D+LT1+LT2+LS+PR+NE+CT+BW+LN+PT,nukes,subset=c(-19,-26)) lm13s=summary(lm13) lm13s$which AIC=30*log(lm10s$rss/30)+(1:8)*2 plot(AIC~I(1:8),ylab='AIC',xlab='Number of Predictors',pch=20) # best model now contains # D LT2 LS NE CT PT # compared with # D LS NE CT LN PT # for original data # stripchart plot (Faraway p. 158) - do on ORIGINAL scale of data to assess transformations nukesorig=nukes[,2:12] stripchart(data.frame(scale(nukesorig)),method='jitter',las=2,vertical=TRUE) # could also try boxplot(data.frame(scale(nukes[,2:12]))) # not obvious why any of the variables need to be transformed, though some skewness in C, D and N # Examples of multicollinearity # 1: artificial extension of trees data on p. 218 trees=read.table('C:/Users/rls/aug20/UNC/STOR664/R-Code/trees.txt',header=T) trees$y=log(trees$vol) trees$x1=log(trees$diam) trees$x2=log(trees$ht) trees$x3=0.35*trees$x1-trees$x2-0.15*trees$x1*trees$x2 lm1=lm(y~x1+x2,trees) lm2=lm(y~x1+x2+x3,trees) vif(lm1) vif(lm2) # VIFs for nuclear data vif(lm4) vif(lm8) # illustrating "boxcox" function nukes=read.table('C:/Users/rls/aug20/UNC/STOR664/R-Code/nukes.txt',header=T) nukes=read.table('D:/STOR664-F21/nukes.txt',header=T) nukes$LC=log(nukes$C) nukes$LT1=log(nukes$T1) nukes$LT2=log(nukes$T2) nukes$LS=log(nukes$S) nukes$LN=log(nukes$N) lm1=lm(C~D+LT1+LT2+LS+PR+NE+CT+BW+LN+PT,nukes) library(MASS) boxcox(lm1)