KNN code for Boston marathon analysis
Contents
Clear workspace
clear all
User inputs
k=200;
Load and prepare data
cd ../Data
load TIM2
cd ../Code
TIM2.K35Fin=TIM2.K3540+TIM2.K40Fin;
TIM2.K30Fin=TIM2.K3035+TIM2.K3540+TIM2.K40Fin;
TIM2.K25Fin=TIM2.K2530+TIM2.K3035+TIM2.K3540+TIM2.K40Fin;
TIM2.K20Fin=TIM2.K2025+TIM2.K2530+TIM2.K3035+TIM2.K3540+TIM2.K40Fin;
TIM2.FTknn=zeros(length(TIM2),1);
ind_all=~isnan(TIM2.K40Fin);
TIM2_matrix=double(TIM2);
Sort data by available splits and use respective model
ind_new=isnan(TIM2.K2025);
Y=TIM2_matrix(ind_all,7:10);split_Y=TIM2.K20Fin(ind_all);
X=TIM2_matrix(ind_new,7:10);
[IDX,D] = knnsearch(Y,X,'K',k);
pred_knn=zeros(length(X),3);
for i = 1:length(X)
ind_IDX=IDX(i,:)';
mdl= LinearModel.fit(Y(ind_IDX,:),split_Y(ind_IDX));
pred_LM=predict(mdl,X(i,:));
pred_knn(i,:)=[mean(split_Y(ind_IDX)) median(split_Y(ind_IDX)) pred_LM] ;
end
TIM2.FTknn(ind_new)=pred_knn(:,3)+TIM2.K05(ind_new)+TIM2.K510(ind_new)...
+TIM2.K1015(ind_new)+TIM2.K1520(ind_new);
ind_new=~isnan(TIM2.K2025)&isnan(TIM2.K2530);
Y=TIM2_matrix(ind_all,7:11);split_Y=TIM2.K25Fin(ind_all);
X=TIM2_matrix(ind_new,7:11);
[IDX,D] = knnsearch(Y,X,'K',k);
pred_knn=zeros(length(X),3);
for i = 1:length(X)
ind_IDX=IDX(i,:)';
mdl= LinearModel.fit(Y(ind_IDX,:),split_Y(ind_IDX));
pred_LM=predict(mdl,X(i,:));
pred_knn(i,:)=[mean(split_Y(ind_IDX)) median(split_Y(ind_IDX)) pred_LM] ;
end
TIM2.FTknn(ind_new)=pred_knn(:,3)+TIM2.K05(ind_new)+TIM2.K510(ind_new)...
+TIM2.K1015(ind_new)+TIM2.K1520(ind_new)+TIM2.K2025(ind_new);
ind_new=~isnan(TIM2.K2025)&~isnan(TIM2.K2530)&isnan(TIM2.K3035);
Y=TIM2_matrix(ind_all,7:12);split_Y=TIM2.K30Fin(ind_all);
X=TIM2_matrix(ind_new,7:12);
[IDX,D] = knnsearch(Y,X,'K',k);
pred_knn=zeros(length(X),3);
for i = 1:length(X)
ind_IDX=IDX(i,:)';
mdl= LinearModel.fit(Y(ind_IDX,:),split_Y(ind_IDX));
pred_LM=predict(mdl,X(i,:));
pred_knn(i,:)=[mean(split_Y(ind_IDX)) median(split_Y(ind_IDX)) pred_LM] ;
end
TIM2.FTknn(ind_new)=pred_knn(:,3)+TIM2.K05(ind_new)+TIM2.K510(ind_new)...
+TIM2.K1015(ind_new)+TIM2.K1520(ind_new)+TIM2.K2025(ind_new)+TIM2.K2530(ind_new);
ind_new=~isnan(TIM2.K2025)&~isnan(TIM2.K2530)&~isnan(TIM2.K3035)&isnan(TIM2.K3540);
Y=TIM2_matrix(ind_all,7:13);split_Y=TIM2.K35Fin(ind_all);
X=TIM2_matrix(ind_new,7:13);
[IDX,D] = knnsearch(Y,X,'K',k);
pred_knn=zeros(length(X),3);
for i = 1:length(X)
ind_IDX=IDX(i,:)';
mdl= LinearModel.fit(Y(ind_IDX,:),split_Y(ind_IDX));
pred_LM=predict(mdl,X(i,:));
pred_knn(i,:)=[mean(split_Y(ind_IDX)) median(split_Y(ind_IDX)) pred_LM] ;
end
TIM2.FTknn(ind_new)=pred_knn(:,3)+TIM2.K05(ind_new)+TIM2.K510(ind_new)...
+TIM2.K1015(ind_new)+TIM2.K1520(ind_new)+TIM2.K2025(ind_new)+...
TIM2.K2530(ind_new)+TIM2.K3035(ind_new);
ind_new=~isnan(TIM2.K2025)&~isnan(TIM2.K2530)&~isnan(TIM2.K3035)&...
~isnan(TIM2.K3540)&isnan(TIM2.K40Fin);
Y=TIM2_matrix(ind_all,7:14);split_Y=TIM2.K40Fin(ind_all);
X=TIM2_matrix(ind_new,7:14);
[IDX,D] = knnsearch(Y,X,'K',k);
pred_knn=zeros(length(X),3);
for i = 1:length(X)
ind_IDX=IDX(i,:)';
mdl= LinearModel.fit(Y(ind_IDX,:),split_Y(ind_IDX));
pred_LM=predict(mdl,X(i,:));
pred_knn(i,:)=[mean(split_Y(ind_IDX)) median(split_Y(ind_IDX)) pred_LM] ;
end
TIM2.FTknn(ind_new)=pred_knn(:,3)+TIM2.K05(ind_new)+TIM2.K510(ind_new)...
+TIM2.K1015(ind_new)+TIM2.K1520(ind_new)+TIM2.K2025(ind_new)+...
TIM2.K2530(ind_new)+TIM2.K3035(ind_new)+TIM2.K3540(ind_new);
TIM2knn=TIM2;
Save results
cd ../Results
savefilename=['TIM2knn',num2str(k)];
save(savefilename, 'TIM2knn')
cd ../Code
Notes
- The folder structure used in this code is BostonAnalysis/Code, BostonAnalysis/Data, and BostonAnalysis/Results.
- The options set in the knnsearch function are the default options using euclidean distance and a kd-tree search algorithm. The explicit specification is [IDX,D] = knnsearch(Y,X,'K',k,'Distance','euclidean','NSMethod','kdtree').