# Rcode for Section 13.4: epilepsy data




library(faraway)
data(epilepsy)


epilepsy$period=rep(0:4,59)
epilepsy$drug=factor(c("placebo","treatment")[epilepsy$treat+1])
epilepsy$phase=factor(c("baseline","experiment")[epilepsy$expind+1])
epilepsy[epilepsy$id<2.5,]


# group summaries
library(dplyr)
epilepsy %>% 
  group_by (drug,phase) %>%
  summarise(rate=mean(seizures/timeadj)) %>%
  xtabs(formula=rate~phase+drug)

library(ggplot2)
ggplot(epilepsy,aes(x=period,y=seizures,linetype=drug,group=id))+
geom_line()+xlim(1,4)+scale_y_sqrt(breaks=(0:10)^2)+
theme(legend.position='top',legend.direction='horizontal')

ratesum=epilepsy %>%
  group_by(id,phase,drug) %>%
  summarise(rate=mean(seizures/timeadj))
library(tidyr)
comsum=spread(ratesum,phase,rate)
ggplot(comsum,aes(x=baseline,y=experiment,shape=drug))+geom_point()+
scale_x_sqrt()+scale_y_sqrt()+geom_abline(intercept=0,slope=1)+
theme(legend.position='top',legend.direction='horizontal')

  
# Here is a direct way to produce the right-hand panel in fig. 13.5
rate1=matrix(nrow=59,ncol=3)
for(id in 1:59){
rate1[id,1]=mean(epilepsy$seizures[epilepsy$id==id&epilepsy$expind==0]/
epilepsy$timeadj[epilepsy$id==id&epilepsy$expind==0])
rate1[id,2]=mean(epilepsy$seizures[epilepsy$id==id&epilepsy$expind==1]/
epilepsy$timeadj[epilepsy$id==id&epilepsy$expind==1])
rate1[id,3]=mean(epilepsy$treat[epilepsy$id==id&epilepsy$expind==1])
}
plot(sqrt(rate1[,1]),sqrt(rate1[,2]),xlab='baseline',ylab='experiment',type='n')
points(sqrt(rate1[rate1[,3]==0,1]),sqrt(rate1[rate1[,3]==0,2]),col='blue',pch=20)
points(sqrt(rate1[rate1[,3]==1,1]),sqrt(rate1[rate1[,3]==1,2]),col='red',pch=20)
abline(a=0,b=1)

# remove suspected outlier in subject 49
epilo=filter(epilepsy,id!=49)

# GLM fit - wrong model but do this first

modglm=glm(seizures~offset(log(timeadj))+expind+treat+I(expind*treat),family=poisson,epilo)
sumary(modglm)
# shows significant negative interaction but analysis doesn't allow for grouping


# PQL method
library(MASS)
modpql=glmmPQL(seizures~offset(log(timeadj))+expind+treat+I(expind*treat),
random=~1|id,family=poisson,epilo)
summary(modpql)

# Fitting with glmer - Gauss-Hermite quadrature
library(lme4)
modgh=glmer(seizures~offset(log(timeadj))+expind+treat+I(expind*treat)+(1|id),
nAGQ=25,family=poisson,epilo)
summary(modgh)

# show proportional effect of treatment based on interaction term
exp(-0.302)


# prep data for STAN
epilo$id[epilo$id==59]=49
xm=model.matrix(~expind+treat+I(expind*treat),epilo)
epildat=with(epilo,list(Nobs=nrow(epilo),Nsubs=length(unique(id)),
Npreds=ncol(xm),
y=seizures,
subject=id,
x=xm,offset=timeadj))

# compile and run stan

T1=Sys.time()
library(rstan)
# in the next line you'll need to adjust the path name for your own computer
#
# either put the file glmmpois.stan is your R home directory or include
# the full path 
#
rt=stanc('C:/Users/rsmith/jan16/UNC/STOR556/Classes/glmmpois.stan')
sm=stan_model(stanc_ret=rt,verbose=F)
fit=sampling(sm,data=epildat)
T2=Sys.time()
print(T2-T1)


# traceplot of results
traceplot(fit,pars='sigmasubj',inc_warmup=F)

ipars=data.frame(rstan::extract(fit,pars=c('sigmasubj','beta')))
colnames(ipars)=c("subject","intercept","expind","treat","interaction")

# posterior densities of two key quantities
ggplot(ipars,aes(x=subject))+geom_density()

ggplot(ipars,aes(x=interaction))+geom_density()+geom_vline(xintercept=0)

# Bayesian quantiles and p-values
bayespval=function(x){p=mean(x>0);2*min(p,1-p)}
smat=apply(ipars,2,function(x) c(mean(x),quantile(x,c(0.025,0.975)),bayespval(x)))
rownames(smat)=c('mean','LCB','UCB','pvalue')
t(smat)


# GEE approach (section 13.5)
# This is the model shown by Faraway
library(geepack)
modgeep=geeglm(seizures~offset(log(timeadj))+expind+treat+I(expind*treat),id=id,
family=poisson,corstr='ar1',data=epilepsy,subset=(id!=49))
summary(modgeep)

library(geepack)
# The book by Diggle et al. does this
modgeep=geeglm(seizures~offset(log(timeadj))+expind+treat+I(expind*treat),id=id,
family=poisson,corstr='exchangeable',data=epilepsy,subset=(id!=49))
summary(modgeep)

library(geepack)
# Here is a third possible GEE model
modgeep=geeglm(seizures~offset(log(timeadj))+expind+treat+I(expind*treat),id=id,
family=poisson,corstr='unstructured',data=epilepsy,subset=(id!=49))
summary(modgeep)

#some other models using glmer

# square root link - included only to illustrate that non-canonica link is possible
library(lme4)
modgh1=glmer(seizures~offset(log(timeadj))+expind+treat+I(expind*treat)+(1|id),
family=poisson(link='sqrt'),epilo)
summary(modgh1)

# refit modgh with implicit nAGQ=1
library(lme4)
modgh2=glmer(seizures~offset(log(timeadj))+expind+treat+I(expind*treat)+(1|id),
family=poisson,epilo)
summary(modgh2)

# add "period" as a random effect
library(lme4)
modgh3=glmer(seizures~offset(log(timeadj))+expind+treat+I(expind*treat)+(1+period|id),
family=poisson,epilo)
summary(modgh3)

# add "expind" as a random effect (instead of period) (suggested by Diggle et al. book)
library(lme4)
modgh4=glmer(seizures~offset(log(timeadj))+expind+treat+I(expind*treat)+(1+expind|id),
family=poisson,epilo)
summary(modgh4)

# both "expind" and "period" as random effects
library(lme4)
modgh5=glmer(seizures~offset(log(timeadj))+expind+treat+I(expind*treat)+(1+expind+period|id),
family=poisson,epilo)
summary(modgh5)

# some tests of nested models
anova(modgh3,modgh2)
anova(modgh4,modgh2)
anova(modgh5,modgh4)
anova(modgh5,modgh3)

# each of these anova comparisons shows statistical significance - conclude the most
complicated model (modgh5) is the best


#  residual plots
par(mfrow=c(2,2),cex=0.9)
plot(fitted(modgh),residuals(modgh),xlab='Fitted Values',ylab='Residuals',
pch=20,main='modgh')
plot(fitted(modgh5),residuals(modgh4),xlab='Fitted Values',ylab='Residuals',
pch=20,main='modgh5')
qqnorm(residuals(modgh),main='modgh',pch=20)
abline(0,1)
qqnorm(residuals(modgh5),main='modgh5',pch=20)
abline(0,1)

# will probably omit this - calculations of Pearson X2
sum((epilo$seizures-fitted(modgh1))^2/fitted(modgh1))
sum((epilo$seizures-fitted(modgh2))^2/fitted(modgh2))
sum((epilo$seizures-fitted(modgh3))^2/fitted(modgh3))
sum((epilo$seizures-fitted(modgh4))^2/fitted(modgh4))
sum((epilo$seizures-fitted(modgh4))^2/fitted(modgh5))