# Program to perform goodness of fit tests for regression data
# This example is for the Amherst data set but can be
# modified for other regressions
#
# The first four lines read in the data, define n to be
# the number of observations ("temp" is one of the variables)
# and then perform a regression. You should modify these
# four lines for your own application.
#
amh<-read.table('amherst.s',header=T)
attach(amh)
n<-length(temp)
nreg<-lm(temp~year)
#
# nsim is the number of simulations. I use 1000 here but this
# may be increased for a more accurate result
#
nsim<-1000
#
# The next lines compute the Looney-Gulledge statistic c1, the
# Kolmogorov-Smirnov statistic d1, the Cramer-von Mises
# statistic w1 and the Anderson-Darling statistic a1
#
q1<-qqnorm(nreg$resid,plot=F)
c1<-cor(q1$x,q1$y)
u1<-pnorm(sort(nreg$resid),mean=0,sd=summary(nreg)$sigma)
d1<-max(c((1:n)/n-u1,u1-(0:(n-1))/n))
w1<-sum((u1-((1:n)-0.5)/n)^2)+1/(12*n)
a1<--sum((2*(1:n)-1)*log(u1)+(2*n+1-2*(1:n))*log(1-u1))/n-n
print(c(c1,d1,w1,a1))
count<-rep(0,4)
for(j in 1:nsim){
#
# The next two lines define a new temperature variable temp1 by
# simulation and perform the same regression as above, but with
# temp1 replacing temp. For your application, modify these two
# lines so you are performing the same regression as in the earlier
# use of lm
#
temp1<-rnorm(n)
nreg<-lm(temp1~year)
q2<-qqnorm(nreg$resid,plot=F)
c2<-cor(q2$x,q2$y)
if(c2d1)count[2]<-count[2]+1
if(w2>w1)count[3]<-count[3]+1
if(a2>a1)count[4]<-count[4]+1
print(c(j,c2,d2,w2,a2))
}
#
# The four components of "count" represent the number of times the
# simulation resulted in a more extreme value of the test statistic
# than the one computed from the real data. If these are less than
# 5% of nsim, you may conclude that the null hypothests is rejected
# at the 5% level
#
print(count)