library(faraway)
y=c(320,14,80,36)
particle=gl(2,1,4,labels=c('no','yes'))
quality=gl(2,2,labels=c('good','bad'))
wafer=data.frame(y,particle,quality)

ov=xtabs(y~quality+particle)

modl=glm(y~particle+quality,family=poisson)
sumary(modl)

modl=glm(y~particle*quality,family=poisson)
sumary(modl)

drop1(modl,test='Chi')

(t(model.matrix(modl))%*%y)[,]

pp=prop.table(xtabs(y~particle))

qp=prop.table(xtabs(y~quality))

fv=outer(qp,pp)*450

2*sum(ov*log(ov/fv))



sum((fv-ov)^2/fv)



prop.test(ov)

m=matrix(y,nrow=2)
modb=glm(m~1,family=binomial)
deviance(modb)

fisher.test(ov)
# direct computation of odds ratio
m[1,1]*m[2,2]/(m[1,2]*m[2,1])

# section 6.2
library(faraway)
data(haireye)
ct=xtabs(y~hair+eye,haireye)
summary(ct)


par(mfrow=c(1,2))
dotchart(ct)
mosaicplot(ct,color=T,main=NULL,las=1)

# Poisson plot
modc=glm(y~hair+eye,family=poisson,haireye)
summary(modc)

# a hypothetical example: generate a similar table 
# by simulation where rows and columns are genuinely independent

coltotals=rep(1,4)%*%matrix(ct,ncol=4)
rowtotals=matrix(ct,ncol=4)%*%rep(1,4)
A=outer(as.vector(rowtotals),as.vector(coltotals))/sum(ct)
for(i in 1:4){for(j in 1:4){A[i,j]=rpois(1,A[i,j])}}
y1=as.vector(A)

# now repeat above statistics using y1

ct=xtabs(y1~hair+eye,haireye)
summary(ct)

par(mfrow=c(1,2))
dotchart(ct)
mosaicplot(ct,color=T,main=NULL,las=1)

modc=glm(y1~hair+eye,family=poisson,haireye)
summary(modc)


# Section 6.4 matched pairs

data(eyegrade)
(ct=xtabs(y~right+left,eyegrade))

summary(ct)

(symfac=factor(apply(eyegrade[,2:3],1,function(x) paste(sort(x),collapse='-'))))


mods=glm(y~symfac,eyegrade,family=poisson)
c(deviance(mods),df.residual(mods))

pchisq(deviance(mods),df.residual(mods),lower=F)


round(xtabs(residuals(mods)~right+left,eyegrade),3)

margin.table(ct,1)
margin.table(ct,2)

modq=glm(y~right+left+symfac,eyegrade,family=poisson)
pchisq(deviance(modq),df.residual(modq),lower=F)

anova(mods,modq,test='Chi')

modqi=glm(y~right+left,eyegrade,family=poisson, subset=-c(1,6,11,16))
pchisq(deviance(modqi),df.residual(modqi),lower=F)

### section 6.5

library(faraway)
data(femsmoke)
femsmoke

(ct=xtabs(y~smoker+dead,femsmoke))

prop.table(ct,1)

summary(ct)

(cta=xtabs(y~smoker+dead,femsmoke,subset=(age=='55-64')))
prop.table(cta,1)

# repeat with other agegroups
(cta=xtabs(y~smoker+dead,femsmoke,subset=(age=='18-24')))
prop.table(cta,1)

# repeat with other agegroups
(cta=xtabs(y~smoker+dead,femsmoke,subset=(age=='25-34')))
prop.table(cta,1)

(cta=xtabs(y~smoker+dead,femsmoke,subset=(age=='35-44')))
prop.table(cta,1)

(cta=xtabs(y~smoker+dead,femsmoke,subset=(age=='45-54')))
prop.table(cta,1)

(cta=xtabs(y~smoker+dead,femsmoke,subset=(age=='65-74')))
prop.table(cta,1)

(cta=xtabs(y~smoker+dead,femsmoke,subset=(age=='75+')))
prop.table(cta,1)

(cta=xtabs(y~smoker+dead,femsmoke,subset=(age=='55-64')))
prop.table(cta,1)

prop.table(xtabs(y~smoker+age,femsmoke),2)

fisher.test(cta)

ct3=xtabs(y~smoker+dead+age,femsmoke)
apply(ct3,3,function(x) (x[1,1]*x[2,2])/(x[1,2]*x[2,1]))

mantelhaen.test(ct3,exact=T)

summary(ct3)

modi=glm(y~smoker+dead+age,femsmoke,family=poisson)
c(deviance(modi),df.residual(modi),pchisq(deviance(modi),df.residual(modi),lower=F))

(coefsmoke=exp(c(0,coef(modi)[2])))

coefsmoke/sum(coefsmoke)

prop.table(xtabs(y~smoker,femsmoke))

modj=glm(y~smoker*dead+age,femsmoke,family=poisson)
c(deviance(modj),df.residual(modj),pchisq(deviance(modj),df.residual(modj),lower=F))

modc=glm(y~smoker*age + age*dead,femsmoke,family=poisson)
c(deviance(modc),df.residual(modc),pchisq(deviance(modc),df.residual(modc),lower=F))

modu=glm(y~(smoker+age+dead)^2,femsmoke,family=poisson)

ctf=xtabs(fitted(modu)~smoker+dead+age,femsmoke)
apply(ctf,3,function(x) (x[1,1]*x[2,2])/(x[1,2]*x[2,1]))

anova(modc,modu,test='Chi')


exp(coef(modu)['smokerno:deadno'])

modsat=glm(y~smoker*age*dead,femsmoke,family=poisson)

ctf=xtabs(fitted(modsat)~smoker+dead+age,femsmoke)
apply(ctf,3,function(x) (x[1,1]*x[2,2])/(x[1,2]*x[2,1]))

drop1(modsat,test='Chi')

drop1(modu,test='Chi')

ybin=matrix(femsmoke$y,ncol=2)
modbin=glm(ybin~smoker*age,femsmoke[1:14,],family=binomial)


drop1(modbin,test='Chi')

modbinr=glm(ybin~smoker+age,femsmoke[1:14,],family=binomial)
drop1(modbinr,test='Chi')


deviance(modu)
deviance(modbinr)

exp(-coef(modbinr)[2])

modbinull=glm(ybin~1,femsmoke[1:14,],family=binomial)
deviance(modbinull)


# section 6.6

data(nes96)
(tab1=xtabs(~PID+educ,nes96))

(partyed=as.data.frame.table(xtabs(~PID+educ,nes96)))

nomod=glm(Freq~PID+educ,partyed,family=poisson)
pchisq(deviance(nomod),df.residual(nomod),lower=F)

partyed$oPID=unclass(partyed$PID)
partyed$oeduc=unclass(partyed$educ)

ormod=glm(Freq~PID+educ+I(oPID*oeduc),partyed,family=poisson)
anova(nomod,ormod,test='Chi')

summary(ormod)$coef['I(oPID * oeduc)',]
apid=c(1,2,5,6,7,10,11)
aeduc=c(1,1,1,2,2,3,3)
ormoda=glm(Freq~PID+educ+I(apid[oPID]*aeduc[oeduc]),partyed,family=poisson)
anova(nomod,ormoda,test='Chi')

round(xtabs(predict(ormod,type='response')~PID+educ,partyed),2)
round(xtabs(predict(ormoda,type='response')~PID+educ,partyed),2)

# odds ratio for lower right 2x2 table - result is same as coefficient of I(oPID * oeduc)
log(39.28*28.85/(47.49*23.19))

round(xtabs(residuals(ormod,type='response')~PID+educ,partyed),2)

cmod=glm(Freq~PID+educ+educ:oPID,partyed,family=poisson)

anova(nomod,cmod,test='Chi')

summary(cmod)$coef[14:19,]

anova(ormod,cmod,test='Chi')

aedu=c(1,1,2,2,2,2,2)
ormodb=glm(Freq~PID+educ+I(apid[oPID]*aedu[oeduc]),partyed,family=poisson)
deviance(ormodb)

deviance(ormod)