library(faraway)
data(faithful)

par(mfrow=c(1,1),cex=1.1)
plot(waiting~eruptions,pch=20,ylab='Waiting Time',xlab='Length of Eruption',faithful)
lines(faithful$eruptions,lm(waiting~eruptions,faithful)$fitted,lw=3,col='red')


library(splines)
erupspl=ns(faithful$eruptions,8)
lines(lm(waiting[order(eruptions)]~erupspl[order(eruptions),])$fitted~sort(eruptions),
faithful,lw=3,col='blue')

# alternative way of handling ordering
faith1=faithful[order(faithful$eruptions),]
plot(waiting~eruptions,pch=20,ylab='Waiting Time',xlab='Length of Eruption',faith1)
lines(lm(waiting~ns(eruptions,8))$fitted~eruptions,lw=3,col='blue',faith1)

# kernel method
par(mfrow=c(1,3),cex=1.1)
for(bw in c(0.1,0.5,2)){
with(faithful,{
plot(waiting~eruptions,col='green',pch=20)
lines(ksmooth(eruptions,waiting,"normal",bw))
})}

# optimal kernel smoothing using Bowman-Azzalini method

par(mfrow=c(1,1),cex=1.1)
library(sm)
with(faithful,sm.regression(eruptions,waiting,h=h.select(eruptions,waiting)))


# smoothing splines method

# note the following method automatically selects the optimal smoothing by cross-validation

with(faithful,{
plot(waiting~eruptions,pch=20,col='green')
lines(smooth.spline(eruptions,waiting),lty=2,lw=3,col='red')
})

# example where this goes wrong
data(exb)
with(exb,{
plot(y~x,pch=20,col='green')
lines(smooth.spline(x,y),lty=2,lw=3,col='red')
})


# loess method

with(faithful,{
plot(waiting~eruptions,col='green',pch=20)
f=loess(waiting~eruptions,span=0.25)
i=order(eruptions)
lines(f$x[i],f$fitted[i],lw=3,col='blue')
})
# try other values of span and also try omitting span entirely: the default is 0.75


# confidence bands

library(ggplot2)
ggplot(faithful,aes(x=eruptions,y=waiting))+geom_point(alpha=0.25)+geom_smooth(
method='loess',span=0.5)


library(mgcv)
ggplot(faithful,aes(x=eruptions,y=waiting))+geom_point(alpha=0.25)+geom_smooth(
method='gam',formula=y~s(x))


library(mgcv)
data(exa)
ggplot(exa,aes(x=x,y=y))+geom_point(alpha=0.25)+geom_smooth(
method='gam',formula=y~s(x))



# fit linear model to polynomial splines, old faithful data
erupspl=ns(faithful$eruptions,8)
lm1=lm(waiting~erupspl,faithful)
# add ones to erupspl to allow for intercept term
erups2=cbind(rep(1,272),erupspl)
# compute pointwise standard deviations at all values of eruptions
sd_points=sqrt(diag(erups2 %*% vcov(lm1) %*% t(erups2)))
# t statistic for 95% confidence intervals (adjust confidence limit if desired)
ts=qt(0.975,df.residual(lm1))
# redraw plot with confience lines
i=order(faithful$eruptions)
par(mfrow=c(1,1),cex=1.1)
with(faithful,{
plot(waiting~eruptions,pch=20,ylab='Waiting Time',xlab='Length of Eruption')
lines(eruptions[i],lm1$fitted[i],lw=5,col='blue')
lines(eruptions[i],lm1$fitted[i]+ts*sd_points[i],lw=3,col='green')
lines(eruptions[i],lm1$fitted[i]-ts*sd_points[i],lw=3,col='green')
})



# much easier way to do this:

erupspl=ns(faithful$eruptions,8)
lm1=lm(waiting~erupspl,faithful)
pr1=predict(lm1,se.fit=T,interval='confidence')
i=order(faithful$eruptions)
par(mfrow=c(1,1),cex=1.1)
with(faithful,{
plot(waiting~eruptions,pch=20,ylab='Waiting Time',xlab='Length of Eruption')
lines(eruptions[i],pr1$fit[i,1],lw=5,col='blue')
lines(eruptions[i],pr1$fit[i,2],lw=3,col='green')
lines(eruptions[i],pr1$fit[i,3],lw=3,col='green')
})