# Hierarchical clustering for astronomy dataset # Initial loading and plot of data stars<-matrix(scan('astro.txt'),byrow=T,ncol=2) names<-1:47 plot(stars,xlab='log temperature',ylab='log intensity',type='n') for(i in 1:47){text(stars[i,1],stars[i,2],names[i],cex=0.8)} y1<-dist(stars,method='euclidean') # comparison of three methods of clustering y2<-hclust(y1,method='single') plot(y2,cex=0.5) y2<-hclust(y1,method='average') plot(y2,cex=0.5) y2<-hclust(y1,method='complete') plot(y2,cex=0.5) # color plot to illustrate last clustering u1<-c(7,14,17,27,26,16,14,18,19,15,22,21,29,35,47,23,31) ind<-rep(2,47) ind[u1]<-1 plot(stars,xlab='log temperature',ylab='log intensity',type='n') points(stars[ind==1,],col='green') points(stars[ind==2,],col='red') # model-based clustering mc1<-Mclust(stars) mc1 plot(mc1,data=stars)