KNN code for Boston marathon analysis

Contents

Clear workspace

clear all

User inputs

k=200; % Specify neighborhood size

Load and prepare data

cd ../Data
load TIM2
cd ../Code

% Create cumulative times to finish
TIM2.K35Fin=TIM2.K3540+TIM2.K40Fin;
TIM2.K30Fin=TIM2.K3035+TIM2.K3540+TIM2.K40Fin;
TIM2.K25Fin=TIM2.K2530+TIM2.K3035+TIM2.K3540+TIM2.K40Fin;
TIM2.K20Fin=TIM2.K2025+TIM2.K2530+TIM2.K3035+TIM2.K3540+TIM2.K40Fin;
TIM2.FTknn=zeros(length(TIM2),1);

ind_all=~isnan(TIM2.K40Fin);%find records with finish times
TIM2_matrix=double(TIM2);

Sort data by available splits and use respective model

% Starting at 20K
ind_new=isnan(TIM2.K2025);
Y=TIM2_matrix(ind_all,7:10);split_Y=TIM2.K20Fin(ind_all);
X=TIM2_matrix(ind_new,7:10);

[IDX,D] = knnsearch(Y,X,'K',k);
%[IDX,D] = knnsearch(log(splits_all(ind_finish,1:8)),log(Y_40),'K',k);
pred_knn=zeros(length(X),3);

for i = 1:length(X)
    ind_IDX=IDX(i,:)';
    mdl= LinearModel.fit(Y(ind_IDX,:),split_Y(ind_IDX));
    pred_LM=predict(mdl,X(i,:));
    pred_knn(i,:)=[mean(split_Y(ind_IDX)) median(split_Y(ind_IDX)) pred_LM] ;
end

TIM2.FTknn(ind_new)=pred_knn(:,3)+TIM2.K05(ind_new)+TIM2.K510(ind_new)...
    +TIM2.K1015(ind_new)+TIM2.K1520(ind_new);

% Starting at 25K
ind_new=~isnan(TIM2.K2025)&isnan(TIM2.K2530);
Y=TIM2_matrix(ind_all,7:11);split_Y=TIM2.K25Fin(ind_all);
X=TIM2_matrix(ind_new,7:11);

[IDX,D] = knnsearch(Y,X,'K',k);
%[IDX,D] = knnsearch(log(splits_all(ind_finish,1:8)),log(Y_40),'K',k);
pred_knn=zeros(length(X),3);

for i = 1:length(X)
    ind_IDX=IDX(i,:)';
    mdl= LinearModel.fit(Y(ind_IDX,:),split_Y(ind_IDX));
    pred_LM=predict(mdl,X(i,:));
    pred_knn(i,:)=[mean(split_Y(ind_IDX)) median(split_Y(ind_IDX)) pred_LM] ;
end

TIM2.FTknn(ind_new)=pred_knn(:,3)+TIM2.K05(ind_new)+TIM2.K510(ind_new)...
    +TIM2.K1015(ind_new)+TIM2.K1520(ind_new)+TIM2.K2025(ind_new);

% Starting at 30K
ind_new=~isnan(TIM2.K2025)&~isnan(TIM2.K2530)&isnan(TIM2.K3035);
Y=TIM2_matrix(ind_all,7:12);split_Y=TIM2.K30Fin(ind_all);
X=TIM2_matrix(ind_new,7:12);

[IDX,D] = knnsearch(Y,X,'K',k);
%[IDX,D] = knnsearch(log(splits_all(ind_finish,1:8)),log(Y_40),'K',k);
pred_knn=zeros(length(X),3);

for i = 1:length(X)
    ind_IDX=IDX(i,:)';
    mdl= LinearModel.fit(Y(ind_IDX,:),split_Y(ind_IDX));
    pred_LM=predict(mdl,X(i,:));
    pred_knn(i,:)=[mean(split_Y(ind_IDX)) median(split_Y(ind_IDX)) pred_LM] ;
end

TIM2.FTknn(ind_new)=pred_knn(:,3)+TIM2.K05(ind_new)+TIM2.K510(ind_new)...
    +TIM2.K1015(ind_new)+TIM2.K1520(ind_new)+TIM2.K2025(ind_new)+TIM2.K2530(ind_new);

% Starting at 35K
ind_new=~isnan(TIM2.K2025)&~isnan(TIM2.K2530)&~isnan(TIM2.K3035)&isnan(TIM2.K3540);
Y=TIM2_matrix(ind_all,7:13);split_Y=TIM2.K35Fin(ind_all);
X=TIM2_matrix(ind_new,7:13);

[IDX,D] = knnsearch(Y,X,'K',k);
%[IDX,D] = knnsearch(log(splits_all(ind_finish,1:8)),log(Y_40),'K',k);
pred_knn=zeros(length(X),3);

for i = 1:length(X)
    ind_IDX=IDX(i,:)';
    mdl= LinearModel.fit(Y(ind_IDX,:),split_Y(ind_IDX));
    pred_LM=predict(mdl,X(i,:));
    pred_knn(i,:)=[mean(split_Y(ind_IDX)) median(split_Y(ind_IDX)) pred_LM] ;
end

TIM2.FTknn(ind_new)=pred_knn(:,3)+TIM2.K05(ind_new)+TIM2.K510(ind_new)...
    +TIM2.K1015(ind_new)+TIM2.K1520(ind_new)+TIM2.K2025(ind_new)+...
    TIM2.K2530(ind_new)+TIM2.K3035(ind_new);

% Starting at 40K
ind_new=~isnan(TIM2.K2025)&~isnan(TIM2.K2530)&~isnan(TIM2.K3035)&...
    ~isnan(TIM2.K3540)&isnan(TIM2.K40Fin);
Y=TIM2_matrix(ind_all,7:14);split_Y=TIM2.K40Fin(ind_all);
X=TIM2_matrix(ind_new,7:14);

[IDX,D] = knnsearch(Y,X,'K',k);
%[IDX,D] = knnsearch(log(splits_all(ind_finish,1:8)),log(Y_40),'K',k);
pred_knn=zeros(length(X),3);

for i = 1:length(X)
    ind_IDX=IDX(i,:)';
    mdl= LinearModel.fit(Y(ind_IDX,:),split_Y(ind_IDX));
    pred_LM=predict(mdl,X(i,:));
    pred_knn(i,:)=[mean(split_Y(ind_IDX)) median(split_Y(ind_IDX)) pred_LM] ;
end

TIM2.FTknn(ind_new)=pred_knn(:,3)+TIM2.K05(ind_new)+TIM2.K510(ind_new)...
    +TIM2.K1015(ind_new)+TIM2.K1520(ind_new)+TIM2.K2025(ind_new)+...
    TIM2.K2530(ind_new)+TIM2.K3035(ind_new)+TIM2.K3540(ind_new);
TIM2knn=TIM2;

Save results

cd ../Results
savefilename=['TIM2knn',num2str(k)];
save(savefilename, 'TIM2knn')
cd ../Code

Notes