library(faraway) data(pulp) op=options(contrasts=c('contr.sum','contr.poly')) lmod=aov(bright~operator,pulp) summary(lmod) library(ggplot2) ggplot(pulp,aes(x=operator,y=bright))+ geom_point(position=position_jitter(width=0.1,height=0.0)) coef(lmod) options(op) (0.447-0.106)/5 # answer 0.068 = this is MOM of sigma^2-alpha library(lme4) mmod=lmer(bright~1+(1|operator),pulp) summary(mmod) # alternative display of results sumary(mmod) # compute exact MLE (default above was REML) smod=lmer(bright~1+(1|operator),pulp,REML=F) sumary(smod) # estimated SDs are slightly smaller # section 10.2 nullmod=lm(bright~1,pulp) lrtstat=as.numeric(2*(logLik(smod)-logLik(nullmod))) pvalue=pchisq(lrtstat,1,lower=F) data.frame(lrtstat,pvalue) # bootstrap test # one sample y=simulate(nullmod) ggplot(pulp,aes(x=operator,y=y$sim_1))+ geom_point(position=position_jitter(width=0.1,height=0.0)) # 1000 samples lrtstat=numeric(1000) y=simulate(nullmod) set.seed(123) # you don't have to set the seed in advance - this is just for reproducbility for(i in 1:1000){ y=unlist(simulate(nullmod)) bnull=lm(y~1) balt=lmer(y~1+(1|operator),pulp,REML=F) lrtstat[i]=as.numeric(2*(logLik(balt)-logLik(bnull))) } mean(lrtstat<0.00001) mean(lrtstat>2.5684) sqrt(0.019*0.981/1000) library(RLRsim) exactLRT(smod,nullmod) exactRLRT(mmod) VarCorr(mmod) as.data.frame(VarCorr(mmod)) bsd=numeric(1000) for(i in 1:1000){ y=unlist(simulate(mmod)) bmod=refit(mmod,y) bsd[i]=as.data.frame(VarCorr(bmod))$sdcor[1] } quantile(bsd,c(0.025,0.975)) confint(mmod,method='boot') # section 10.3 # HW9: chapter 10, questions 3 and 5 # prediction of random effects # recall earlier model mmod fitted by REML r1=ranef(mmod) # this computes random effects without conditional variances r1=ranef(mmod,condVar=T) # this computes random effects with the conditional variances # it looks as though you have to do something like this to extract the conditional variances: str(r1) # construct a plot including the confidence intervals library(lattice) dotplot(r1) # section 10.4 fixef(mmod)+ranef(mmod)$operator predict(mmod,re.form=~0)[1] predict(mmod,newdata=data.frame(operator='a')) # prediction interval for a new observation with an unknown operator group.sd=as.data.frame(VarCorr(mmod))$sdcor[1] resid.sd=as.data.frame(VarCorr(mmod))$sdcor[2] pv=numeric(1000) for(i in 1:1000){ y=unlist(simulate(mmod)) bmod=refit(mmod,y) pv[i]=predict(bmod,re.form=~0)[1]+rnorm(n=1,sd=group.sd)+rnorm(n=1,sd=resid.sd) } quantile(pv,c(0.025,0.975)) # prediction interval for a new observation with a known operator ('a') group.sd=as.data.frame(VarCorr(mmod))$sdcor[1] resid.sd=as.data.frame(VarCorr(mmod))$sdcor[2] pv=numeric(1000) for(i in 1:1000){ y=unlist(simulate(mmod,use.u=T)) bmod=refit(mmod,y) pv[i]=predict(bmod,new.data=data.frame(operator='a'))+rnorm(n=1,sd=resid.sd) } quantile(pv,c(0.025,0.975)) # section 10.5 data(pulp) library(lme4) mmod=lmer(bright~1+(1|operator),pulp) par(mfrow=c(1,2),cex=1.2) qqnorm(residuals(mmod),main='',pch=20) plot(fitted(mmod),residuals(mmod),xlab='Fitted',ylab='Residuals',pch=20) abline(h=0) # adding colors to the qq-plot q1=qqnorm(residuals(mmod),type='n') points(q1$x[1:5],q1$y[1:5],col='red',pch=20) points(q1$x[6:10],q1$y[6:10],col='green',pch=20) points(q1$x[11:15],q1$y[11:15],col='blue',pch=20) points(q1$x[16:20],q1$y[16:20],col='magenta',pch=20) # section 10.6 library(faraway) data(penicillin) summary(penicillin) penicillin # plot data penicillin$Blend=gl(5,4) ggplot(penicillin,aes(y=yield,x=treat,shape=Blend))+geom_point()+xlab('Treatment') ggplot(penicillin,aes(y=yield,x=Blend,shape=treat))+geom_point()+xlab('Blend') # fixed effects model op=options(contrasts=c('contr.sum','contr.poly')) lmod=aov(yield~blend+treat,penicillin) summary(lmod) coef(lmod) # random effects model - blend is random mmod=lmer(yield~treat+(1|blend),penicillin) sumary(mmod) # if you don't fully understand the model results, don't be afrad to do something like this mean(penicillin$yield[penicillin$treat=='A']) mean(penicillin$yield[penicillin$treat=='B']) mean(penicillin$yield[penicillin$treat=='C']) mean(penicillin$yield[penicillin$treat=='D']) options(op) ranef(mmod)$blend amod=aov(yield~treat+Error(blend),penicillin) summary(amod) # this corrects a mistake in class - the anova command does work as advertised anova(mmod) # Kenward-Roger test library(pbkrtest) amod=lmer(yield~treat+(1|blend),penicillin,REML=F) nmod=lmer(yield~1+(1|blend),penicillin,REML=F) KRmodcomp(amod,nmod) # but note, we get the same thing with REML=T (because the K-R software refits anyway) amod=lmer(yield~treat+(1|blend),penicillin,REML=T) nmod=lmer(yield~1+(1|blend),penicillin,REML=T) KRmodcomp(amod,nmod) #parametric bootstrap amod=lmer(yield~treat+(1|blend),penicillin,REML=F) nmod=lmer(yield~1+(1|blend),penicillin,REML=F) as.numeric(2*(logLik(amod)-logLik(nmod))) 1-pchisq(4.0474,3) lrtstat=numeric(1000) for(i in 1:1000){ ryield=unlist(simulate(nmod)) nmodr=refit(nmod,ryield) amodr=refit(amod,ryield) lrtstat[i]=2*(logLik(amodr)-logLik(nmodr)) } mean(lrtstat<0.001) mean(lrtstat>4.0474) # also do same thing with parametric bootstrap pmod=PBmodcomp(amod,nmod) summary(pmod) # test significance of blends rmod=lmer(yield~treat+(1|blend),penicillin) nlmod=lm(yield~treat,penicillin) as.numeric(2*(logLik(rmod)-logLik(nlmod,REML=T))) # simulation test here lrtstatf=numeric(1000) for(i in 1:1000){ ryield=unlist(simulate(nlmod)) nlmodr=lm(ryield~treat,penicillin) #rmodr=refit(rmod,ryield) rmodr=lmer(ryield~treat+(1|blend),penicillin) lrtstatf[i]=2*(logLik(rmodr)-logLik(nlmodr,REML=T)) } mean(lrtstatf<0.00001) mean(lrtstatf>2.7629) library(RLRsim) exactRLRT(rmod) # Section 10.7 library(faraway) data(irrigation) library(lme4) m1=lmer(yield~irrigation+variety+irrigation:variety+(1|field)+(1|field:variety),irrigation) m2=lmer(yield~irrigation+variety+irrigation:variety+(1|field),irrigation) m3=lmer(yield~irrigation+variety+(1|field),irrigation) m4=lmer(yield~variety+(1|field),irrigation) m5=lmer(yield~irrigation+(1|field),irrigation) m6=lmer(yield~1+(1|field),irrigation) # Kenward-Roger tests library(pbkrtest) KRmodcomp(m2,m3) KRmodcomp(m3,m4) KRmodcomp(m3,m5) KRmodcomp(m4,m6) KRmodcomp(m5,m6) # Model m6 seems as good as any other # Test statistical significance of random field effect in m6 library(RLRsim) exactRLRT(m6) # Confidence intervals confint(m6) # Diagnostics par(mfrow=c(1,2),cex=1,2) plot(fitted(m6),residuals(m6),xlab='Fitted',ylab='Residuals',pch=20) qqnorm(residuals(m6),main='',pch=20) # Section 10.8 data(eggs) library(ggplot2) ggplot(eggs,aes(y=Fat,x=Lab,color=Technician,shape=Sample))+ geom_point(position=position_jitter(width=0.1,height=0.0))+scale_color_grey() m1=lmer(Fat~1+(1|Lab)+(1|Lab:Technician)+(1|Lab:Technician:Sample),data=eggs) m2=lmer(Fat~1+(1|Lab)+(1|Lab:Technician),data=eggs) m3=lmer(Fat~1+(1|Lab:Technician:Sample),data=eggs) library(RLRsim) exactRLRT(m3,m1,m2) # m1: full model under the alternative # m2: null model # m3: a random effects model containing ONLY the term set to 0 under the null # The p-value is not significant # Elminate "Sample", test for Technician effect: m2 is now the alternative hypothesis m4=lmer(Fat~1+(1|Lab),data=eggs) m5=lmer(Fat~1+(1|Lab:Technician),data=eggs) exactRLRT(m5,m2,m4) # p-value about 0.002 # we could also have use parametric bootstrap tests library(pbkrtest) PBmodcomp(m1,m2) PBmodcomp(m2,m4) # Confidence intervals in original model confint(m1) # My comment on this example: I don't see why we can't treat Sample as a fixed effect m6=lmer(Fat~1+Sample+(1|Lab)+(1|Lab:Technician),data=eggs) KRmodcomp(m6,m2) # p-value is 0.076 --- not significant, but not too far off # If Sample really did represent two manufacturers of egg powder, I think this would be # the right test # Section 10.9 library(faraway) data(abrasion) matrix(abrasion$material,4,4) # fixed effects model: m1=aov(wear~material+run+position,abrasion) summary(m1) # random effects model library(lme4) m2=lmer(wear~material+(1|run)+(1|position),abrasion) summary(m2) # KR test for the fixed effect m3=lmer(wear~1+(1|run)+(1|position),abrasion) library(pbkrtest) KRmodcomp(m2,m3) # RLRT tests for the successive random effects library(RLRsim) m4=lmer(wear~material+(1|run),abrasion) m5=lmer(wear~material+(1|position),abrasion) exactRLRT(m4,m2,m5) exactRLRT(m5,m2,m4) # Section 10.10 data(jsp) # drop everything outside year 2 jspr=jsp[jsp$year==2,] library(ggplot2) ggplot(jspr,aes(x=raven,y=math))+xlab('Raven Score')+ylab('Math Score')+ geom_point(position=position_jitter(),alpha=0.3) ggplot(jspr,aes(x=social,y=math))+xlab('Social Class')+ylab('Math Score')+ geom_boxplot() # multiple regression approach glin=lm(math~raven*gender*social,jspr) anova(glin) # remove gender glin1=lm(math~raven*social,jspr) anova(glin1) anova(glin,glin1) # remove interaction glin2=lm(math~raven+social,jspr) summary(glin2) anova(glin1,glin2) # remove social glin3=lm(math~raven,jspr) summary(glin3) anova(glin1,glin3) # interaction is significant but gender isn't # both raven and social class are significant # problem with this analysis: ignores grouping of students into schools and classes # table of school numbers table(jspr$school) # random effects model mmod=lmer(math~raven*social*gender+(1|school)+(1|school:class),data=jspr) summary(mmod) # eliminate gender, use KR test mmodr=lmer(math~raven*social+(1|school)+(1|school:class),data=jspr) library(pbkrtest) KRmodcomp(mmod,mmodr) # gender effect not significant summary(mmodr) # eliminate interaction, use KR test mmodr2=lmer(math~raven+social+(1|school)+(1|school:class),data=jspr) library(pbkrtest) KRmodcomp(mmodr,mmodr2) # interaction is significant # now run a series of models and test by AIC/BIC. Must use MLE not REML for this all3=lmer(math~raven*social*gender+(1|school)+(1|school:class),data=jspr,REML=F) all2=update(all3,.~.-raven:social:gender) notrs=update(all2,.~.-raven:social) notrg=update(all2,.~.-raven:gender) notsg=update(all2,.~.-social:gender) onlyrs=update(all2,.~.-social:gender-raven:gender) all1=update(all2,.~.-social:gender-raven:gender-raven:social) nogen=update(all1,.~.-gender) # comparisons of all these models anova(all3,all2,notrs,notrg,notsg,onlyrs,all1,nogen)[,1:4] # new model with gender eliminated: use centered raven score jspr$craven=jspr$raven-mean(jspr$raven) mmod=lmer(math~craven*social+(1|school)+(1|school:class),jspr) sumary(mmod) # adjusted school effects adjscores=ranef(mmod)$school[[1]] # compare with raw scores for each school rawscores=coef(lm(math~school-1,jspr)) rawscores=rawscores-mean(rawscores) par(mfrow=c(1,1),cex=1.2) plot(rawscores,adjscores,pch=20) sint=c(9,14,29) text(rawscores[sint],adjscores[sint]+0.2,c('9','15','30')) #some schools show marked differences between raw and adjusted scores # test for significance of school or class effects mmodc=lmer(math~craven*social+(1|school:class),jspr) mmods=lmer(math~craven*social+(1|school),jspr) library(RLRsim) exactRLRT(mmodc,mmod,mmods) exactRLRT(mmods,mmod,mmodc) # class effect marginal, school effect is definitely there # compositional effects schraven=lm(raven~school,jspr)$fit mmodc=lmer(math~craven*social+schraven*social+(1|school),jspr) KRmodcomp(mmod,mmodc) # conclusion: mean raven score per school does not have significant extra effect