library(faraway) y=c(320,14,80,36) particle=gl(2,1,4,labels=c('no','yes')) quality=gl(2,2,labels=c('good','bad')) wafer=data.frame(y,particle,quality) ov=xtabs(y~quality+particle) modl=glm(y~particle+quality,family=poisson) sumary(modl) modl=glm(y~particle*quality,family=poisson) sumary(modl) drop1(modl,test='Chi') (t(model.matrix(modl))%*%y)[,] pp=prop.table(xtabs(y~particle)) qp=prop.table(xtabs(y~quality)) fv=outer(qp,pp)*450 2*sum(ov*log(ov/fv)) sum((fv-ov)^2/fv) prop.test(ov) m=matrix(y,nrow=2) modb=glm(m~1,family=binomial) deviance(modb) fisher.test(ov) # direct computation of odds ratio m[1,1]*m[2,2]/(m[1,2]*m[2,1]) # section 6.2 library(faraway) data(haireye) ct=xtabs(y~hair+eye,haireye) summary(ct) par(mfrow=c(1,2)) dotchart(ct) mosaicplot(ct,color=T,main=NULL,las=1) # Poisson plot modc=glm(y~hair+eye,family=poisson,haireye) summary(modc) # a hypothetical example: generate a similar table # by simulation where rows and columns are genuinely independent coltotals=rep(1,4)%*%matrix(ct,ncol=4) rowtotals=matrix(ct,ncol=4)%*%rep(1,4) A=outer(as.vector(rowtotals),as.vector(coltotals))/sum(ct) for(i in 1:4){for(j in 1:4){A[i,j]=rpois(1,A[i,j])}} y1=as.vector(A) # now repeat above statistics using y1 ct=xtabs(y1~hair+eye,haireye) summary(ct) par(mfrow=c(1,2)) dotchart(ct) mosaicplot(ct,color=T,main=NULL,las=1) modc=glm(y1~hair+eye,family=poisson,haireye) summary(modc) # Section 6.4 matched pairs data(eyegrade) (ct=xtabs(y~right+left,eyegrade)) summary(ct) (symfac=factor(apply(eyegrade[,2:3],1,function(x) paste(sort(x),collapse='-')))) mods=glm(y~symfac,eyegrade,family=poisson) c(deviance(mods),df.residual(mods)) pchisq(deviance(mods),df.residual(mods),lower=F) round(xtabs(residuals(mods)~right+left,eyegrade),3) margin.table(ct,1) margin.table(ct,2) modq=glm(y~right+left+symfac,eyegrade,family=poisson) pchisq(deviance(modq),df.residual(modq),lower=F) anova(mods,modq,test='Chi') modqi=glm(y~right+left,eyegrade,family=poisson, subset=-c(1,6,11,16)) pchisq(deviance(modqi),df.residual(modqi),lower=F) ### section 6.5 library(faraway) data(femsmoke) femsmoke (ct=xtabs(y~smoker+dead,femsmoke)) prop.table(ct,1) summary(ct) (cta=xtabs(y~smoker+dead,femsmoke,subset=(age=='55-64'))) prop.table(cta,1) # repeat with other agegroups (cta=xtabs(y~smoker+dead,femsmoke,subset=(age=='18-24'))) prop.table(cta,1) # repeat with other agegroups (cta=xtabs(y~smoker+dead,femsmoke,subset=(age=='25-34'))) prop.table(cta,1) (cta=xtabs(y~smoker+dead,femsmoke,subset=(age=='35-44'))) prop.table(cta,1) (cta=xtabs(y~smoker+dead,femsmoke,subset=(age=='45-54'))) prop.table(cta,1) (cta=xtabs(y~smoker+dead,femsmoke,subset=(age=='65-74'))) prop.table(cta,1) (cta=xtabs(y~smoker+dead,femsmoke,subset=(age=='75+'))) prop.table(cta,1) (cta=xtabs(y~smoker+dead,femsmoke,subset=(age=='55-64'))) prop.table(cta,1) prop.table(xtabs(y~smoker+age,femsmoke),2) fisher.test(cta) ct3=xtabs(y~smoker+dead+age,femsmoke) apply(ct3,3,function(x) (x[1,1]*x[2,2])/(x[1,2]*x[2,1])) mantelhaen.test(ct3,exact=T) summary(ct3) modi=glm(y~smoker+dead+age,femsmoke,family=poisson) c(deviance(modi),df.residual(modi),pchisq(deviance(modi),df.residual(modi),lower=F)) (coefsmoke=exp(c(0,coef(modi)[2]))) coefsmoke/sum(coefsmoke) prop.table(xtabs(y~smoker,femsmoke)) modj=glm(y~smoker*dead+age,femsmoke,family=poisson) c(deviance(modj),df.residual(modj),pchisq(deviance(modj),df.residual(modj),lower=F)) modc=glm(y~smoker*age + age*dead,femsmoke,family=poisson) c(deviance(modc),df.residual(modc),pchisq(deviance(modc),df.residual(modc),lower=F)) modu=glm(y~(smoker+age+dead)^2,femsmoke,family=poisson) ctf=xtabs(fitted(modu)~smoker+dead+age,femsmoke) apply(ctf,3,function(x) (x[1,1]*x[2,2])/(x[1,2]*x[2,1])) anova(modc,modu,test='Chi') exp(coef(modu)['smokerno:deadno']) modsat=glm(y~smoker*age*dead,femsmoke,family=poisson) ctf=xtabs(fitted(modsat)~smoker+dead+age,femsmoke) apply(ctf,3,function(x) (x[1,1]*x[2,2])/(x[1,2]*x[2,1])) drop1(modsat,test='Chi') drop1(modu,test='Chi') ybin=matrix(femsmoke$y,ncol=2) modbin=glm(ybin~smoker*age,femsmoke[1:14,],family=binomial) drop1(modbin,test='Chi') modbinr=glm(ybin~smoker+age,femsmoke[1:14,],family=binomial) drop1(modbinr,test='Chi') deviance(modu) deviance(modbinr) exp(-coef(modbinr)[2]) modbinull=glm(ybin~1,femsmoke[1:14,],family=binomial) deviance(modbinull) # section 6.6 data(nes96) (tab1=xtabs(~PID+educ,nes96)) (partyed=as.data.frame.table(xtabs(~PID+educ,nes96))) nomod=glm(Freq~PID+educ,partyed,family=poisson) pchisq(deviance(nomod),df.residual(nomod),lower=F) partyed$oPID=unclass(partyed$PID) partyed$oeduc=unclass(partyed$educ) ormod=glm(Freq~PID+educ+I(oPID*oeduc),partyed,family=poisson) anova(nomod,ormod,test='Chi') summary(ormod)$coef['I(oPID * oeduc)',] apid=c(1,2,5,6,7,10,11) aeduc=c(1,1,1,2,2,3,3) ormoda=glm(Freq~PID+educ+I(apid[oPID]*aeduc[oeduc]),partyed,family=poisson) anova(nomod,ormoda,test='Chi') round(xtabs(predict(ormod,type='response')~PID+educ,partyed),2) round(xtabs(predict(ormoda,type='response')~PID+educ,partyed),2) # odds ratio for lower right 2x2 table - result is same as coefficient of I(oPID * oeduc) log(39.28*28.85/(47.49*23.19)) round(xtabs(residuals(ormod,type='response')~PID+educ,partyed),2) cmod=glm(Freq~PID+educ+educ:oPID,partyed,family=poisson) anova(nomod,cmod,test='Chi') summary(cmod)$coef[14:19,] anova(ormod,cmod,test='Chi') aedu=c(1,1,2,2,2,2,2) ormodb=glm(Freq~PID+educ+I(apid[oPID]*aedu[oeduc]),partyed,family=poisson) deviance(ormodb) deviance(ormod)