######################################################### ## PCA, MDS Demo ## ## Author: Han-Ming Wu (hmwu@stat.sinica.edu.tw) ## ## Institute of Statistical Science, Academia Sinica ## ## http://www.sinica.edu.tw/~hmwu/ ## ## 2005/07/27 ## ######################################################### ## Read Data setwd("C:\\Program Files\\R\\rw2001\\WorkingData") library(stats) cell.matrix <- read.table("TradCellCycle103_alpha.txt", header=TRUE) n <- dim(cell.matrix)[1] p <- dim(cell.matrix)[2]-2 cell.data <- cell.matrix[,3:p+2] gene.name <- cell.matrix[,1] gene.phase <- cell.matrix[,2] phase <- unique(gene.phase) phase.name <- c("G1", "S", "S/G2", "G2/M", "M/G1") ## standardized data cell.sdata <- (cell.data-apply(cell.data, 1, mean))/sqrt(apply(cell.data, 1, var)) ## PCA on genes cell.pca <- princomp(cell.sdata, cor=TRUE, scores=TRUE) # 2D plot for first two components pca.dim1 <- cell.pca$scores[,1] pca.dim2 <- cell.pca$scores[,2] plot(pca.dim1, pca.dim2, main="PCA for Cell Cycle Data on Genes", xlab="1st PCA Componnet", ylab="2nd PCA Componnet", col=c(phase), pch=c(phase)) legend(3, 4, phase.name, pch=c(phase), col=c(phase)) # shows a screeplot. plot(cell.pca) biplot(cell.pca) ## loadings plot plot(loadings(cell.pca)[,1], loadings(cell.pca)[,2], xlab="1st PCA", ylab="2nd PCA", main="Loadings Plot", type="n") text(loadings(cell.pca)[,1], loadings(cell.pca)[,2], labels=paste(1:p)) abline(h=0) abline(v=0) # print loadings loadings(cell.pca) summary(cell.pca) ## ## PCA on experiments ## exp.name <- cell.matrix[0,3:20] exp.pca <- princomp(t(cell.data[1:10,]), cor=TRUE, scores=TRUE) # 2D plot for first two componnets plot(exp.pca$scores[,1], exp.pca$scores[,2], main="PCA for Cell Cycle Data on Conditions", xlab="1st PCA Componnet", ylab="2nd PCA Componnet", type="n") text(exp.pca$scores[,1], exp.pca$scores[,2], labels=paste(1:p)) # shows a screeplot. plot(exp.pca) biplot(exp.pca) ## note that blank entries are small but not zero loadings(exp.pca) summary(exp.pca) ## ## MDS ## library(stats) #correlation matrix cell.cor<- cor(t(cell.sdata)) #distance matrix cell.dist<- sqrt(2*(1-cell.cor)) cell.mds<- cmdscale(cell.dist) mds.dim1 <- cell.mds[,1] mds.dim2 <- cell.mds[,2] plot(mds.dim1, mds.dim2, type="n", xlab="MDS-1", ylab="MDS-2", main="MDS for Cell Cycle Data") for(i in 0:4){ text(mds.dim1[number[gene.phase==i]], mds.dim2[number[gene.phase==i]], gene.phase[number[gene.phase==i]] , cex=0.8, col= i+1) } legend(0.8, 1.0, phase.name, pch="01234", col=c(1,2,3,4,5)) ## Note ## symbols pch=c(unique(gene.phase)) ## numbers pch=paste(c(unique(gene.phase))) ## Reference K. R. Gabriel (1971). The biplot graphical display of matrices with application to principal component analysis. Biometrika 58, 453-467.