# R code for computer examples in Richard Smith's multivariate analysis notes # (Chapters 1-4) # For the notes go here: # https://rls.sites.oasis.unc.edu/faculty/rs/s133/mvnotes.pdf # For datasets go here: # https://rls.sites.oasis.unc.edu/faculty/rs/s133/Mult/mult.html # Chapter 2 (note "iris" is part of the R package so no need to download data(iris) par(mfrow=c(2,2)) plot.design(iris) plot(Sepal.Length~Species,iris) plot(Sepal.Width~Species,iris) plot(Petal.Length~Species,iris) plot(Petal.Width~Species,iris) m1=manova(cbind(Sepal.Length,Sepal.Width,Petal.Length,Petal.Width)~Species,iris) summary(m1,test='Pillai') summary(m1,test='Wilks') summary(m1,test='Hotelling-Lawley') summary(m1,test='Roy') # Chapter 3 - PCA # use your own local path for file names. exams=read.table('C:/Users/rsmith/feb08/UNC/s754/dataandprograms/PCA/exams.txt',header=F) dimnames(exams)=list(1:nrow(exams),c('Vectors','Mechanics','Algebra','Analysis','Statistics')) exams.prc=prcomp(exams,scale=F) print(summary(exams.prc)) # Adjust graphical parameters as needed - cex parameter controls size of type # or in some cases whether the type appears at all par(mfrow=c(1,1),cex=0.9) screeplot(exams.prc) screeplot(exams.prc,type='lines') # The following replaces the "plot.loadings" commands in my earlier S-Plus notes par(mfrow=c(3,2),cex=1) barplot(exams.prc$rotation[order(abs(exams.prc$rotation[,1]),decreasing=T),1],main='PC 1') barplot(exams.prc$rotation[order(abs(exams.prc$rotation[,2]),decreasing=T),2],main='PC 2') barplot(exams.prc$rotation[order(abs(exams.prc$rotation[,3]),decreasing=T),3],main='PC 3') barplot(exams.prc$rotation[order(abs(exams.prc$rotation[,4]),decreasing=T),4],main='PC 4') barplot(exams.prc$rotation[order(abs(exams.prc$rotation[,5]),decreasing=T),5],main='PC 5') # print the standard deviations print(exams.prc$sdev) # Prediction plots pr<-predict(exams.prc) par(mfrow=c(2,2),cex=1.2) plot(1:88,pr[,1],xlab='Student',ylab='Component 1',pch=20) plot(1:88,pr[,2],xlab='Student',ylab='Component 2',pch=20) plot(1:88,pr[,3],xlab='Student',ylab='Component 3',pch=20) plot(1:88,pr[,4],xlab='Student',ylab='Component 4',pch=20) # Predictions for new dataset exam2<-matrix(scan(file='C:/Users/rsmith/feb08/UNC/s754/dataandprograms/PCA/exam2.txt'),byrow=T,ncol=5) dimnames(exam2)=list(1:nrow(exam2),c('Vectors','Mechanics','Algebra','Analysis','Statistics')) print(predict(exams.prc,newdata=exam2)) # Biplot par(mfrow=c(1,1),cex=1.1) biplot(exams.prc) # crimes dataset crimes=read.table('C:/Users/rsmith/feb08/UNC/s754/dataandprograms/PCA/crimes.txt',header=F) # Transpose of original dataset, add 3-letter summaries for names of crimes crimes=t(crimes) dimnames(crimes)=list(1:nrow(crimes),c( 'Hcd','Wou','Hom','Het','Bre','Rob','Lar','Fra','Rec', 'Inj','For','Bla','Ass','Mal','Rev','Alc','Ind','Mot')) # PCA - note use of "prcomp" rather than "princomp" though both could be used crime<-matrix(scan(file='C:/Users/rsmith/feb08/UNC/s754/dataandprograms/PCA/crimes.txt'),byrow=T,ncol=14) crime<-t(crime) nr<-length(crime[,1]) dimnames(crime)<-list(1:nr,c("Hcd","Wou","Hom","Het","Brk","Rob","Lar","Fra","Rec","Inj","For","Blc","Ass","Nal","Rev","Alc","Ind","Mot")) crime.prc<-prcomp(crime,scale=T) # two forms of screeplot screeplot(crime.prc,type='barplot') screeplot(crime.prc,type='lines') # to see the numerical values on the scree plot type summary(crime.prc) # plot of loadings par(mfrow=c(2,2),cex=0.7) barplot(crime.prc$rotation[order(abs(crime.prc$rotation[,1]),decreasing=T),1],main='PC 1') barplot(crime.prc$rotation[order(abs(crime.prc$rotation[,2]),decreasing=T),2],main='PC 2') barplot(crime.prc$rotation[order(abs(crime.prc$rotation[,3]),decreasing=T),3],main='PC 3') barplot(crime.prc$rotation[order(abs(crime.prc$rotation[,4]),decreasing=T),4],main='PC 4') # Prediction plot par(mfrow=c(2,2),cex=1.1) plot(1950:1963,predict(crime.prc)[,1],ylab='PC 1',xlab='Year',pch=20) plot(1950:1963,predict(crime.prc)[,2],ylab='PC 2',xlab='Year',pch=20) plot(1950:1963,predict(crime.prc)[,3],ylab='PC 3',xlab='Year',pch=20) plot(1950:1963,predict(crime.prc)[,4],ylab='PC 4',xlab='Year',pch=20) #Biplot par(mfrow=c(1,1),cex=1.1) biplot(crime.prc) # Chapter 4: Factor analysis exams.fa=factanal(exams,factors=1,scores='regression') # alternative would be # exams.fa=factanal(exams,factors=1,scores='Bartlett') plot(1:88,exams.fa$scores,pch=20) # same for two-factor analysis exams.fa=factanal(exams,factors=2,scores='regression') par(mfrow=c(1,2)) plot(1:88,exams.fa$scores[,1],pch=20,xlab='Student',ylab='Score for Factor 1') plot(1:88,exams.fa$scores[,2],pch=20,xlab='Student',ylab='Score for Factor 2') # "biplot" fnction in R doesn't appear to work for factor analysis so omit that # plot of loadings par(mfrow=c(1,2),cex=0.7) barplot(exams.fa$loadings[order(abs(exams.fa$loadings[,1]),decreasing=T),1],main='Factor 1') barplot(exams.fa$loadings[order(abs(exams.fa$loadings[,2]),decreasing=T),2],main='Factor 2')