PCA plot from read count matrix from RNA-Seq

NB – this is now a Bioconductor R package: github.com/kevinblighe/PCAtools

————————-

You should normalise your data prior to performing PCA. In the code below, you’ll have to add plot legends yourself, and also colour vectors (passed to the ‘col‘ parameter).

Then, assuming that you have transcripts as rows and samples as columns:

NB – in this code, the plots I’ve shown don’t necessarily match the exact code, but the plot type is the same

[Edit: also take a look at my definition of PCA here: PCA in a RNA seq analysis]

project.pca <- prcomp(t(MyReadCountMatrix))
summary(project.pca)

#Determine the proportion of variance of each component
#Proportion of variance equals (PC stdev^2) / (sum all PCs stdev^2)
project.pca.proportionvariances <- ((project.pca$sdev^2) / (sum(project.pca$sdev^2)))*100
barplot(project.pca.proportionvariances, cex.names=1, xlab=paste("Principal component (PC), 1-", length(project.pca$sdev)), ylab="Proportion of variation (%)", main="Scree plot", ylim=c(0,100))

scree

par(cex=1.0, cex.axis=0.8, cex.main=0.8)
pairs(project.pca$x[,1:5], col="black", main="Principal components analysis bi-plotnPCs 1-5", pch=16)
pairs(project.pca$x[,6:10], col="black", main="Principal components analysis bi-plotnPCs 6-10", pch=16)

pairs

par(mar=c(4,4,4,4), mfrow=c(1,3), cex=1.0, cex.main=0.8, cex.axis=0.8)

#Plots scatter plot for PC 1 and 2
plot(project.pca$x, type="n", main="Principal components analysis bi-plot", xlab=paste("PC1, ", round(project.pca.proportionvariances[1], 2), "%"), ylab=paste("PC2, ", round(project.pca.proportionvariances[2], 2), "%"))
points(project.pca$x, col="black", pch=16, cex=1)

#Plots scatter plot for PC 1 and 3
plot(project.pca$x[,1], project.pca$x[,3], type="n", main="Principal components analysis bi-plot", xlab=paste("PC1, ", round(project.pca.proportionvariances[1], 2), "%"), ylab=paste("PC3, ", round(project.pca.proportionvariances[3], 2), "%"))
points(project.pca$x[,1], project.pca$x[,3], col="black", pch=16, cex=1)

#Plots scatter plot for PC 2 and 3
plot(project.pca$x[,2], project.pca$x[,3], type="n", main="Principal components analysis bi-plot", xlab=paste("PC2, ", round(project.pca.proportionvariances[2], 2), "%"), ylab=paste("PC3, ", round(project.pca.proportionvariances[3], 2), "%"))
points(project.pca$x[,2], project.pca$x[,3], col="black", pch=16, cex=1)

biplots

require(scatterplot3d)
par(mar=c(4,4,4,4), cex=1.0, cex.main=0.8, cex.axis=0.8)

scatterplot3d(project.pca$x[,1:3], angle=-40, main="", color="black", pch=17, xlab=paste("PC1, ", round(project.pca.proportionvariances[1], 2), "%"), ylab=paste("PC2, ", round(project.pca.proportionvariances[2], 2), "%"), zlab=paste("PC3, ", round(project.pca.proportionvariances[3], 2), "%"), grid=FALSE, box=FALSE)
source('http://www.sthda.com/sthda/RDoc/functions/addgrids3d.r')
addgrids3d(project.pca$x[,1:3], grid = c("xy", "xz", "yz"))

source('http://www.sthda.com/sthda/RDoc/functions/addgrids3d.r')
addgrids3d(project.pca$x[,1:3], grid = c("xy", "xz", "yz"))

1kg

Read more here: Source link