# Rcode for Section 13.4: epilepsy data library(faraway) data(epilepsy) epilepsy$period=rep(0:4,59) epilepsy$drug=factor(c("placebo","treatment")[epilepsy$treat+1]) epilepsy$phase=factor(c("baseline","experiment")[epilepsy$expind+1]) epilepsy[epilepsy$id<2.5,] # group summaries library(dplyr) epilepsy %>% group_by (drug,phase) %>% summarise(rate=mean(seizures/timeadj)) %>% xtabs(formula=rate~phase+drug) library(ggplot2) ggplot(epilepsy,aes(x=period,y=seizures,linetype=drug,group=id))+ geom_line()+xlim(1,4)+scale_y_sqrt(breaks=(0:10)^2)+ theme(legend.position='top',legend.direction='horizontal') ratesum=epilepsy %>% group_by(id,phase,drug) %>% summarise(rate=mean(seizures/timeadj)) library(tidyr) comsum=spread(ratesum,phase,rate) ggplot(comsum,aes(x=baseline,y=experiment,shape=drug))+geom_point()+ scale_x_sqrt()+scale_y_sqrt()+geom_abline(intercept=0,slope=1)+ theme(legend.position='top',legend.direction='horizontal') # Here is a direct way to produce the right-hand panel in fig. 13.5 rate1=matrix(nrow=59,ncol=3) for(id in 1:59){ rate1[id,1]=mean(epilepsy$seizures[epilepsy$id==id&epilepsy$expind==0]/ epilepsy$timeadj[epilepsy$id==id&epilepsy$expind==0]) rate1[id,2]=mean(epilepsy$seizures[epilepsy$id==id&epilepsy$expind==1]/ epilepsy$timeadj[epilepsy$id==id&epilepsy$expind==1]) rate1[id,3]=mean(epilepsy$treat[epilepsy$id==id&epilepsy$expind==1]) } plot(sqrt(rate1[,1]),sqrt(rate1[,2]),xlab='baseline',ylab='experiment',type='n') points(sqrt(rate1[rate1[,3]==0,1]),sqrt(rate1[rate1[,3]==0,2]),col='blue',pch=20) points(sqrt(rate1[rate1[,3]==1,1]),sqrt(rate1[rate1[,3]==1,2]),col='red',pch=20) abline(a=0,b=1) # remove suspected outlier in subject 49 epilo=filter(epilepsy,id!=49) # GLM fit - wrong model but do this first modglm=glm(seizures~offset(log(timeadj))+expind+treat+I(expind*treat),family=poisson,epilo) sumary(modglm) # shows significant negative interaction but analysis doesn't allow for grouping # PQL method library(MASS) modpql=glmmPQL(seizures~offset(log(timeadj))+expind+treat+I(expind*treat), random=~1|id,family=poisson,epilo) summary(modpql) # Fitting with glmer - Gauss-Hermite quadrature library(lme4) modgh=glmer(seizures~offset(log(timeadj))+expind+treat+I(expind*treat)+(1|id), nAGQ=25,family=poisson,epilo) summary(modgh) # show proportional effect of treatment based on interaction term exp(-0.302) # prep data for STAN epilo$id[epilo$id==59]=49 xm=model.matrix(~expind+treat+I(expind*treat),epilo) epildat=with(epilo,list(Nobs=nrow(epilo),Nsubs=length(unique(id)), Npreds=ncol(xm), y=seizures, subject=id, x=xm,offset=timeadj)) # compile and run stan T1=Sys.time() library(rstan) # in the next line you'll need to adjust the path name for your own computer # # either put the file glmmpois.stan is your R home directory or include # the full path # rt=stanc('C:/Users/rsmith/jan16/UNC/STOR556/Classes/glmmpois.stan') sm=stan_model(stanc_ret=rt,verbose=F) fit=sampling(sm,data=epildat) T2=Sys.time() print(T2-T1) # traceplot of results traceplot(fit,pars='sigmasubj',inc_warmup=F) ipars=data.frame(rstan::extract(fit,pars=c('sigmasubj','beta'))) colnames(ipars)=c("subject","intercept","expind","treat","interaction") # posterior densities of two key quantities ggplot(ipars,aes(x=subject))+geom_density() ggplot(ipars,aes(x=interaction))+geom_density()+geom_vline(xintercept=0) # Bayesian quantiles and p-values bayespval=function(x){p=mean(x>0);2*min(p,1-p)} smat=apply(ipars,2,function(x) c(mean(x),quantile(x,c(0.025,0.975)),bayespval(x))) rownames(smat)=c('mean','LCB','UCB','pvalue') t(smat) # GEE approach (section 13.5) # This is the model shown by Faraway library(geepack) modgeep=geeglm(seizures~offset(log(timeadj))+expind+treat+I(expind*treat),id=id, family=poisson,corstr='ar1',data=epilepsy,subset=(id!=49)) summary(modgeep) library(geepack) # The book by Diggle et al. does this modgeep=geeglm(seizures~offset(log(timeadj))+expind+treat+I(expind*treat),id=id, family=poisson,corstr='exchangeable',data=epilepsy,subset=(id!=49)) summary(modgeep) library(geepack) # Here is a third possible GEE model modgeep=geeglm(seizures~offset(log(timeadj))+expind+treat+I(expind*treat),id=id, family=poisson,corstr='unstructured',data=epilepsy,subset=(id!=49)) summary(modgeep) #some other models using glmer # square root link - included only to illustrate that non-canonica link is possible library(lme4) modgh1=glmer(seizures~offset(log(timeadj))+expind+treat+I(expind*treat)+(1|id), family=poisson(link='sqrt'),epilo) summary(modgh1) # refit modgh with implicit nAGQ=1 library(lme4) modgh2=glmer(seizures~offset(log(timeadj))+expind+treat+I(expind*treat)+(1|id), family=poisson,epilo) summary(modgh2) # add "period" as a random effect library(lme4) modgh3=glmer(seizures~offset(log(timeadj))+expind+treat+I(expind*treat)+(1+period|id), family=poisson,epilo) summary(modgh3) # add "expind" as a random effect (instead of period) (suggested by Diggle et al. book) library(lme4) modgh4=glmer(seizures~offset(log(timeadj))+expind+treat+I(expind*treat)+(1+expind|id), family=poisson,epilo) summary(modgh4) # both "expind" and "period" as random effects library(lme4) modgh5=glmer(seizures~offset(log(timeadj))+expind+treat+I(expind*treat)+(1+expind+period|id), family=poisson,epilo) summary(modgh5) # some tests of nested models anova(modgh3,modgh2) anova(modgh4,modgh2) anova(modgh5,modgh4) anova(modgh5,modgh3) # each of these anova comparisons shows statistical significance - conclude the most complicated model (modgh5) is the best # residual plots par(mfrow=c(2,2),cex=0.9) plot(fitted(modgh),residuals(modgh),xlab='Fitted Values',ylab='Residuals', pch=20,main='modgh') plot(fitted(modgh5),residuals(modgh4),xlab='Fitted Values',ylab='Residuals', pch=20,main='modgh5') qqnorm(residuals(modgh),main='modgh',pch=20) abline(0,1) qqnorm(residuals(modgh5),main='modgh5',pch=20) abline(0,1) # will probably omit this - calculations of Pearson X2 sum((epilo$seizures-fitted(modgh1))^2/fitted(modgh1)) sum((epilo$seizures-fitted(modgh2))^2/fitted(modgh2)) sum((epilo$seizures-fitted(modgh3))^2/fitted(modgh3)) sum((epilo$seizures-fitted(modgh4))^2/fitted(modgh4)) sum((epilo$seizures-fitted(modgh4))^2/fitted(modgh5))