# plot gamma densities x=seq(0,8,by=0.1) par(mfrow=c(1,3)) plot(x,dgamma(x,0.75),type='l',ylab='',xlab='',ylim=c(0,1.25),xaxs='i',yaxs='i') plot(x,dgamma(x,1),type='l',ylab='',xlab='',ylim=c(0,1.25),xaxs='i',yaxs='i') plot(x,dgamma(x,2),type='l',ylab='',xlab='',ylim=c(0,1.25),xaxs='i',yaxs='i') # Myers-Montgomery example library(faraway) data(wafer) summary(wafer) # first attempt (not in book): fit linear model, stepwise selection for variables, then Box-Cox l0mdl=lm(resist~.^2,wafer) r0mdl=step(l0mdl) sumary(r0mdl) boxcox(r0mdl) # this suggests a Box-Cox lambda very slightly greater than 0 but with a wide # confidence interval # Therefore, we set lambda=0 (log transformation) and follow the book llmdl=lm(log(resist)~.^2,wafer) rlmdl=step(llmdl) sumary(rlmdl) # now do the same using glm with gamma family gmdl=glm(resist~.^2,family=Gamma(link=log),wafer) rgmdl=step(gmdl) sumary(rgmdl) summary(rgmdl)$dispersion sqrt(summary(rgmdl)$dispersion) # 0.06727 - compare with residual SD for log-Guassian model which was 0.06735 # exact MLE library(MASS) gamma.dispersion(rgmdl) # 0.002265746 - book suggests the GLM value is to be preferred but it's not clear why # # Swedish insurance example library(faraway) data(motorins) motori=motorins[motorins$Zone==1,] gl=glm(Payment~offset(log(Insured))+as.numeric(Kilometres)+Make+Bonus, family=Gamma(link=log),motori) sumary(gl) # compare lognormal model llg=glm(log(Payment)~offset(log(Insured))+as.numeric(Kilometres)+Make+Bonus, family=gaussian,motori) sumary(llg) # Note differences - statistical significance of Kilometres; effects for Make8 # plot densities par(mfrow=c(1,2)) x=seq(0,5,by=0.05) plot(x,dgamma(x,1/0.55597),type='l',ylab='',xlab='',yaxs='i',ylim=c(0,1)) plot(x,dlnorm(x,meanlog=-0.30551,sdlog=sqrt(0.61102)), type='l',ylab='',xlab='',yaxs='i',ylim=c(0,1)) # predictions x0=data.frame(Make='1',Kilometres=1,Bonus=1,Insured=100) predict(gl,new=x0,se=T,type='response') predict(llg,new=x0,se=T,type='response') c(exp(10.998),exp(10.998)*0.16145) # Inverse Gaussian library(SuppDists) x=seq(0,8,by=0.1) par(mfrow=c(1,3)) plot(x,dinvGauss(x,1,0.5),type='l',ylab='',xlab='',ylim=c(0,1.5),xaxs='i',yaxs='i') plot(x,dinvGauss(x,1,1),type='l',ylab='',xlab='',ylim=c(0,1.5),xaxs='i',yaxs='i') plot(x,dinvGauss(x,1,5),type='l',ylab='',xlab='',ylim=c(0,1.5),xaxs='i',yaxs='i') par(mfrow=c(1,1)) data(cpd) lmod=lm(actual~projected-1,cpd) summary(lmod) # residual plot plot(lmod$residuals~log(projected),cpd) abline(h=0) # plot data with fitted line plot(actual~projected,cpd) abline(lmod) # inverse Gaussian model: use identity link to be consistent with previous model igmod=glm(actual~projected-1,family=inverse.gaussian(link='identity'),cpd) sumary(igmod) abline(igmod,lty=2) # residual plot for IG model plot(residuals(igmod)~log(fitted(igmod)),ylab='Deviance Residuals', xlab=expression(log(hat(mu)))) abline(h=0) # In this case he first model fitted seems better