# example of noncentral chisq

c=qchisq(0.95,6)
x=0.1*(0:200)
y=pchisq(c,6,ncp=x,lower.tail=F)
plot(x,y,type='l',xlab='lambda',ylab='power of test',
main='Non-Central Chisq, df=6, alpha=0.05')

# example of noncentral F

c=qf(0.95,3,10)
x=0.1*(0:200)
y=pf(c,3,10,ncp=x,lower.tail=F)
plot(x,y,type='l',xlab='lambda',ylab='power of test',
main='Non-Central F, df=(3,10), alpha=0.05')

# example of text page 137
# 18*M^2/5 is lam*sig^2 - solve for lam
sig=3.4
c=qf(0.95,4,10)
M=8
lam=18*M^2/(5*sig^2)
pf(c,4,10,ncp=lam,lower.tail=F)
M=10
lam=18*M^2/(5*sig^2)
pf(c,4,10,ncp=lam,lower.tail=F)


# minitab tree data

#trees=read.table('C:/Users/rls/aug20/UNC/STOR664/R-Code/trees.txt',header=T)

trees=read.table('http://rls.sites.oasis.unc.edu/faculty/rs/source/Data/tree.dat',header=T)



y=log(trees$vol)
x1=log(trees$diam)
x2=log(trees$ht)
par(mfrow=c(1,2))
plot(x1,y,xlab='Log Diameter',ylab='Log Volume',pch=20)
plot(x2,y,xlab='Log Height',ylab='Log Volume',pch=20)
lm1=lm(y~x1+x2)

# test whether beta_1=2, beta_2=1
y1=y-2*x1-x2
lm2=lm(y1~x1+x2)
lm3=lm(y1~1)
anova(lm3,lm2)

par(mfrow=c(2,2))
x1c=x1-mean(x1)
x2c=x2-mean(x2)
plot(x1c,lm1$resid,ylab='Residual',xlab='x1c',pch=20)
plot(x2c,lm1$resid,ylab='Residual',xlab='x2c',pch=20)
plot(x1c*x2c,lm1$resid,ylab='Residual',xlab='x1c*x2c',pch=20)
plot(lm1$fitted,lm1$resid,ylab='Residual',xlab='Fitted Value',pch=20)



# nuclear power plant data

# nukes=read.table('C:/Users/rls/aug20/UNC/STOR664/R-Code/nukes.txt',header=T)

nukes=read.table('http://rls.sites.oasis.unc.edu/faculty/rs/source/Data/nukes.dat',header=T)
nukes$LC=log(nukes$C)
nukes$LT1=log(nukes$T1)
nukes$LT2=log(nukes$T2)
nukes$LS=log(nukes$S)
nukes$LN=log(nukes$N)
lm4=lm(LC~D+LT1+LT2+LS+PR+NE+CT+BW+LN+PT,nukes)


lm5=lm(LC~D+LS+NE+CT+LN+PT,nukes)
anova(lm4,lm5)

par(mfrow=c(1,2))
plot(lm5$fitted,lm5$residuals,xlab='Fitted Values',ylab='Residuals',pch=20)
plot(nukes$PT,lm5$residuals,xlab='PT',ylab='Residuals',pch=20)

# could also do it like this:

plot(lm5$fitted,lm5$residuals,xlab='Fitted Values',ylab='Residuals',pch=20)
plot(factor(nukes$PT),lm5$residuals,xlab='PT',ylab='Residuals',pch=20)




############ illustrate backward selection

nukes=read.table('http://rls.sites.oasis.unc.edu/faculty/rs/source/Data/nukes.dat',header=T)
nukes$LC=log(nukes$C)
nukes$LT1=log(nukes$T1)
nukes$LT2=log(nukes$T2)
nukes$LS=log(nukes$S)
nukes$LN=log(nukes$N)
lm4=lm(LC~D+LT1+LT2+LS+PR+NE+CT+BW+LN+PT,nukes)
summary(lm4)

# drop BW
lm5=lm(LC~D+LT1+LT2+LS+PR+NE+CT+LN+PT,nukes)
summary(lm5)

# drop LT1
lm6=lm(LC~D+LT2+LS+PR+NE+CT+LN+PT,nukes)
summary(lm6)

# drop LT2
lm7=lm(LC~D+LS+PR+NE+CT+LN+PT,nukes)
summary(lm7)

# drop PR
lm8=lm(LC~D+LS+NE+CT+LN+PT,nukes)
summary(lm8)

# drop PT
lm9=lm(LC~D+LS+NE+CT+LN,nukes)
summary(lm9)


require(leaps)
lm10=regsubsets(LC~D+LT1+LT2+LS+PR+NE+CT+BW+LN+PT,nukes)
lm10s=summary(lm10)

lm10s$which

AIC=32*log(lm10s$rss/32)+(1:8)*2
plot(AIC~I(1:8),ylab='AIC',xlab='Number of Predictors',pch=20)

plot(1:8,lm10s$adjr2,xlab='Number of Parameters',ylab='Adjusted R-Square',pch=20)


plot(1:8,lm10s$cp,xlab='Number of Parameters',ylab='Cp',pch=20)
abline(0,1)

lm11=step(lm4)


# look for influential observations

sort(influence(lm8)$hat)

sort(cooks.distance(lm8))

# try omitting two most influential

lm13=regsubsets(LC~D+LT1+LT2+LS+PR+NE+CT+BW+LN+PT,nukes,subset=c(-19,-26))
lm13s=summary(lm13)
lm13s$which
AIC=30*log(lm10s$rss/30)+(1:8)*2
plot(AIC~I(1:8),ylab='AIC',xlab='Number of Predictors',pch=20)


# best model now contains
# D LT2 LS NE CT PT
# compared with
# D LS NE CT LN PT
# for original data

# stripchart plot (Faraway p. 158) - do on ORIGINAL scale of data to assess transformations
nukesorig=nukes[,2:12]
stripchart(data.frame(scale(nukesorig)),method='jitter',las=2,vertical=TRUE)

# could also try
boxplot(data.frame(scale(nukes[,2:12])))

# not obvious why any of the variables need to be transformed, though some skewness in C, D and N

# Examples of multicollinearity

# 1: artificial extension of trees data on p. 218
trees=read.table('C:/Users/rls/aug20/UNC/STOR664/R-Code/trees.txt',header=T)
trees$y=log(trees$vol)
trees$x1=log(trees$diam)
trees$x2=log(trees$ht)
trees$x3=0.35*trees$x1-trees$x2-0.15*trees$x1*trees$x2

lm1=lm(y~x1+x2,trees)
lm2=lm(y~x1+x2+x3,trees)
vif(lm1)
vif(lm2)

# VIFs for nuclear data
vif(lm4)
vif(lm8)



# illustrating "boxcox" function

nukes=read.table('C:/Users/rls/aug20/UNC/STOR664/R-Code/nukes.txt',header=T)


nukes=read.table('D:/STOR664-F21/nukes.txt',header=T)
nukes$LC=log(nukes$C)
nukes$LT1=log(nukes$T1)
nukes$LT2=log(nukes$T2)
nukes$LS=log(nukes$S)
nukes$LN=log(nukes$N)
lm1=lm(C~D+LT1+LT2+LS+PR+NE+CT+BW+LN+PT,nukes)
library(MASS)
boxcox(lm1)