# source code for rescaled KNN method # This code was used for "Looking Forwards" section and for Figures 7-9. # Make sure R package "FNN" (available from cran) is installed in your system # read data and establish comparison set y1 (full split times for all runners who finished) TIM=read.table('TIM.txt',header=T) FT=as.matrix(TIM[,7:15]) %*% rep(1,9) u1=!is.na(FT) y1=TIM[u1,7:15] library('FNN') # profile of 2:45 and 3:45 marathoners (used to illustrate prediction method) prof=matrix(ncol=9,nrow=2) # 2:45 marathoner nn=which(TIM$BibNum==1381&TIM$Year==2013) prof[1,]=as.numeric(TIM[nn,7:15]) # 3:45 marathoner nn=which(TIM$BibNum==13186&TIM$Year==2013) prof[2,]=as.numeric(TIM[nn,7:15]) # create figure 7 of paper postscript('KNNfig.ps') par(mfrow=c(2,2),cex=1.2) name1=c('25 km Predictor','30 km Predictor','35 km Predictor','40 km Predictor') # 25K KNN for(nind in 5:8){ K=100 indprof=1 knn1=get.knnx(y1[,1:nind],prof[,1:nind], k=K, algorithm=c("VR")) ind1=knn1$nn.index[indprof,] x0=c(5*1:8,42.2) t0=prof[indprof,1:nind] for(j in 2:nind){t0[j]=t0[j]+t0[j-1]} t1=y1[ind1,] for(j in 2:9){t1[,j]=t1[,j-1]+t1[,j]} # recalibrate in pace (min/mile) t0=t0*1.609344/x0[1:nind] for(i in 1:K){t1[i,]=t1[i,]*1.609344/x0} # rescale nearest neighbors to same dropout time for(i in 1:K){t1[i,]=t1[i,]*t0[nind]/t1[i,nind]} t2=matrix(nrow=19,ncol=9) for(i in 1:19){for(j in 1:9){t2[i,j]=quantile(t1[,j],0.05*i)}} if(nind==5)range1=c(quantile(as.matrix(t1[,nind:9]),0.01),quantile(as.matrix(t1[,nind:9]),0.99)) #if(nind==5)range1=c(205,235) #if(nind==6)range1=c(205,230) #if(nind==5)range1=c(7.8,9.0) #if(nind==6)range1=c(7.8,9.0) #plot(x0,rep(0,9),type='n',ylim=range(t1),xlab='Distance Along Course', plot(x0[nind+0:(9-nind)],rep(0,10-nind),type='n',ylim=range1,xlab='Distance Along Course (km)', #ylab='Projected Finish Time (Mins)',main=name1[nind-4]) ylab='Pace (Mins/Mile)',main=name1[nind-4]) for(i in 1:K){lines(x0[nind+0:(9-nind)],t1[i,nind+0:(9-nind)],col='red')} lines(x0[nind+0:(9-nind)],t2[1,nind+0:(9-nind)],col='blue',lw=3) lines(x0[nind+0:(9-nind)],t2[9,nind+0:(9-nind)],col='black',lw=5) lines(x0[nind+0:(9-nind)],t2[19,nind+0:(9-nind)],col='blue',lw=3) lines(x0[nind+0:(9-nind)],rep(t2[9,nind],10-nind),col='green',lw=3) points(x0[nind+0:(9-nind)],t0[nind+0:(9-nind)],cex=1.5,pch=20) } dev.off() # set of results used to draw figures 8 and 9 res1=array(dim=c(9,9,2,5)) # coordinates of res 1 represent dropout point, prediction point, runner and # stats (5%, 50%, 95% points of predictive distribution, linear extrapolation # and actual value for(nind in 5:8){ K=100 knn1=get.knnx(y1[,1:nind],prof[,1:nind], k=K, algorithm=c("VR")) for(ir in 1:2){ ind1=knn1$nn.index[ir,] x0=c(5*1:8,42.2) t0=prof[ir,1:9] for(j in 2:9){t0[j]=t0[j]+t0[j-1]} t1=y1[ind1,] for(j in 2:9){t1[,j]=t1[,j-1]+t1[,j]} # recalibrate in terms of projected finish time t0=t0*1.609344/x0[1:9] for(i in 1:K){t1[i,]=t1[i,]*1.609344/x0} # rescale nearest neighbors to same dropout time for(i in 1:K){t1[i,]=t1[i,]*t0[nind]/t1[i,nind]} t2=matrix(nrow=19,ncol=9) for(i in 1:19){for(j in 1:9){t2[i,j]=quantile(t1[,j],0.05*i)}} for(nind2 in (nind+1):9){ res1[nind,nind2,ir,]=c(t2[c(1,9,19),nind2],t0[nind],t0[nind2])}}} # picture to represent predictive intervals (single panel only) pred1=matrix(nrow=10,ncol=5) for(kk in 1:2){ if(kk==1){titl1='2:45 MARATHONER' titl2='Predfig1.ps' ir2=1} if(kk==2){titl1='3:45 MARATHONER' titl2='Predfig2.ps' ir2=2} pred1[1:4,]=res1[5,6:9,ir2,] pred1[5:7,]=res1[6,7:9,ir2,] pred1[8:9,]=res1[7,8:9,ir2,] pred1[10,]=res1[8,9,ir2,] ymin=min(pred1) ymax=max(pred1) yran=ymax-ymin postscript(titl2) par(cex=1.5) plot(1:10,pred1[,1],xlab=' ',ylab='Pace (Min/Mile)',type='n',ylim=c(ymin-0.2*yran,ymax+0.2*yran), xlim=c(0.5,10.5),main=titl1,xaxt='n') for(i in 1:10){lines(c(i-0.25,i+0.25),c(pred1[i,1],pred1[i,1]))} for(i in 1:10){lines(c(i-0.25,i+0.25),c(pred1[i,2],pred1[i,2]))} for(i in 1:10){lines(c(i-0.25,i+0.25),c(pred1[i,3],pred1[i,3]))} for(i in 1:10){lines(c(i,i),c(pred1[i,1],pred1[i,3]))} for(i in 1:10){points(i,pred1[i,4],pch=20)} for(i in 1:10){points(i,pred1[i,5],pch=22)} lines(c(4.5,4.5),c(ymin-0.1*yran,ymax+0.1*yran)) lines(c(7.5,7.5),c(ymin-0.1*yran,ymax+0.1*yran)) lines(c(9.5,9.5),c(ymin-0.1*yran,ymax+0.1*yran)) text(5,ymax+0.18*yran,'SAMPLING POINT',cex=1.5) text(2.25,ymax+0.08*yran,'25km',cex=1.1) text(6,ymax+0.08*yran,'30km',cex=1.1) text(8.5,ymax+0.08*yran,'35km',cex=1.1) text(10.2,ymax+0.08*yran,'40km',cex=1.1) text(5,ymin-0.18*yran,'PREDICTION POINT',cex=1.5) text(1,ymin-0.08*yran,'30km',cex=1.1) text(2,ymin-0.08*yran,'35km',cex=1.1) text(3,ymin-0.08*yran,'40km',cex=1.1) text(4,ymin-0.08*yran,'FIN',cex=1.1) text(5,ymin-0.08*yran,'35km',cex=1.1) text(6,ymin-0.08*yran,'40km',cex=1.1) text(7,ymin-0.08*yran,'FIN',cex=1.1) text(8,ymin-0.08*yran,'40km',cex=1.1) text(9,ymin-0.08*yran,'FIN',cex=1.1) text(10,ymin-0.08*yran,'FIN',cex=1.1) points(0.5,ymax,pch=22) points(0.5,ymax-0.1*yran,pch=20) text(1.2,ymax,'Actual') text(1.5,ymax-0.1*yran,'ConstPace') dev.off()