library(faraway) data(gavote) help(gavote) # introducing the data head(gavote) str(gavote) summary(gavote) gavote$undercount=(gavote$ballots-gavote$votes)/gavote$ballots summary(gavote$undercount) # not the same as with(gavote,sum(ballots-votes)/sum(ballots)) # graphical and summary statistics hist(gavote$undercount, main='Undercount',xlab='Proportion Undercount') hist(gavote$undercount, main='Undercount',xlab='Proportion Undercount',breaks=20) # same with a larger number of bins plot(density(gavote$undercount),main='Undercount') rug(gavote$undercount) pie(table(gavote$equip)) barplot(sort(table(gavote$equip),decreasing=TRUE),las=2) gavote$pergore = gavote$gore/gavote$votes plot(pergore ~ perAA, gavote, xlab='Proportion African Americans',ylab='Proportion for Gore') plot(undercount~equip, gavote, xlab='', las=3) xtabs(~atlanta+rural,gavote) names(gavote)[4]='usage' nix=c(3,10,11,12) cor(gavote[,nix]) # introducing linear models for this dataset lmod=lm(undercount~pergore+perAA,gavote) coef(lmod) # could summarize results by summary(lmod) # or alternatively extract specific features of the model fit by the following sequence of commands predict(lmod) residuals(lmod) predict(lmod,se.fit=T) deviance(lmod) df.residual(lmod) nrow(gavote)-length(coef(lmod)) sqrt(deviance(lmod)/df.residual(lmod)) lmodsum=summary(lmod) lmodsum$sigma lmodsum$r.squared cor(predict(lmod),gavote$undercount)^2 lmodsum$adj.r.squared # A simplified form of the "summary" comment (specific to the faraway package) is this: sumary(lmod) # first look at the NOrth Carolina dataset Y=read.csv('C:/Users/rsmith/jan16/UNC/STOR556/Data/ProportionNotReturned.csv',header=T) hist(Y$PNR,main='Proportion Absentee Ballots Not Returned - NC Nov 2018') plot(density(Y$PNR),main='Proportion Absentee Ballots Not Returned - NC Nov 2018') rug(Y$PNR)