# reading the dat for HW1 Y=read.csv('C:/Users/rsmith/jan16/UNC/STOR556/Data/ProportionNotReturned.csv',header=T) hist(Y$PNR,main='Proportion Absentee Ballots Not Returned - NC Nov 2018') plot(density(Y$PNR),main='Proportion Absentee Ballots Not Returned - NC Nov 2018') rug(Y$PNR) # illustration of the slick method to omit Robeson and Bladen counties from the analysis # these are the only two counties with PNR>0.1 wts=Y$PNR<0.1 # example of a plausible linear model (not the solution to HW1) lm1=lm(Y$PNR~Y$Hsgrad+Y$Collgrad,weights=as.numeric(wts)) # note use of "as.numeric" to turn wts into a numerical variable # now create predictions predict(lm1,se.fit=T) # the predictions for founties 9 and 78 are what you want ##################################### # continuation of faraway's example ##################################### # rerun basic set up library(faraway) data(gavote) gavote$undercount=(gavote$ballots-gavote$votes)/gavote$ballots summary(gavote$undercount) names(gavote)[4]='usage' gavote$pergore = gavote$gore/gavote$votes # fit linear model lmod=lm(undercount~pergore+perAA,gavote) summary(lmod) # the next command is specific to the faraway package sumary(lmod) predict(lmod) predict(lmod,se.fit=T) # how to omit some data wts=c(0,0,rep(1,157)) lmodw=lm(undercount~pergore+perAA,gavote,weights=wts) predict(lmodw,se.fit=T) # several of the diagnostics associated with lmod deviance(lmod) df.residual(lmod) nrow(gavote)-length(coef(lmod)) sqrt(deviance(lmod)/df.residual(lmod)) summary(lmod)$r.squared cor(predict(lmod),gavote$undercount)^2 lmodsum$adj.r.squared # now fit a more complicated linear model, includes an interaction term gavote$cpergore=gavote$pergore-mean(gavote$pergore) gavote$cperAA=gavote$perAA-mean(gavote$perAA) lmodi=lm(undercount~cperAA+cpergore*usage+equip,gavote) summary(lmodi) # illustrates hypothesis testing through F test anova(lmod,lmodi) # p-value is 0.003 - reject null hypothesis that the simpler model (lmod) is correct drop1(lmodi,test='F') # drops variables one at a time using F tests to determine which are significant confint(lmodi) plot(lmodi) # this defines the sequence of four diagnostic plots. The first three should be self-explanatory - I'll discuss the fourth next time