How to transform the deg gene list from seurat to a gene list input to clusterProfiler compareCluster ?

Sorry for lateness,

I wanted to do something similar. This is what I did for reference:

Using a Seurat generated gene list for input into ClusterProfiler to see the GO or KEGG terms per cluster.

I’ll keep the meat and potatoes of the Seurat vignette in this tutorial:

library(dplyr)
library(Seurat)
library(patchwork)

# Load the PBMC dataset
SeuratData::InstallData("pbmc3k")
pbmc <- SeuratData::LoadData("pbmc3k")
# Initialize the Seurat object with the raw (non-normalized data).
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
#Without graph.name argument
pbmc <- FindNeighbors(pbmc, graph.name ="test", dims = 1:10)
pbmc <- FindClusters(pbmc, graph.name = "test", algorithm = 1, resolution = 0.5)
#pbmc <- FindNeighbors(pbmc, dims = 1:10)
#pbmc <- FindClusters(pbmc, algorithm = 1, resolution = 0.5)
pbmc <- RunUMAP(pbmc, dims = 1:10)
DimPlot(pbmc, reduction = "umap", label = TRUE, label.size = 6)

enter image description here

pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.1, logfc.threshold = 0.25)
pbmc.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC)

##################################################################
#Subsetting top 100 markers with adjusted p values lower than .05#
##################################################################
top100 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 100, wt = avg_log2FC)
top100pval <- subset(top100, rowSums(top100[5] < 0.05) > 0)

Now for that you have your dataframe with top ~100 genes per cluster with a pvalue adjusted of lower than .05. We can bring this to ClusterProfiler.

#ClusterProfiler
if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")


BiocManager::install("clusterProfiler")
BiocManager::install("org.Hs.eg.db")
BiocManager::install("AnnotationHub")
library("clusterProfiler")
library("org.Hs.eg.db")
library("AnnotationHub")

df <- top100pval[,7:6]
dfsample <- split(df$gene,df$cluster)
length(dfsample)

#The output of length(dfsample) returns how many clusters you have
#Here there at 9 clusters (0, 1, 2, 3, 4, 5, 6, 7 and 8)
#I'm sure there's a better way but you have to make a line like below for each cluster

dfsample$`0` = bitr(dfsample$`0`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
dfsample$`1` = bitr(dfsample$`1`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
dfsample$`2` = bitr(dfsample$`2`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
dfsample$`3` = bitr(dfsample$`3`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
dfsample$`4` = bitr(dfsample$`4`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
dfsample$`5` = bitr(dfsample$`5`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
dfsample$`6` = bitr(dfsample$`6`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
dfsample$`7` = bitr(dfsample$`7`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
dfsample$`8` = bitr(dfsample$`8`, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")


#do the same here, a line like below for each cluster
genelist <- list("0" = dfsample$`0`$ENTREZID, 
                 "1" = dfsample$`1`$ENTREZID,
                 "2" = dfsample$`2`$ENTREZID,
                 "3" = dfsample$`3`$ENTREZID,
                 "4" = dfsample$`4`$ENTREZID,
                 "5" = dfsample$`5`$ENTREZID,
                 "6" = dfsample$`6`$ENTREZID,
                 "7" = dfsample$`7`$ENTREZID,
                 "8" = dfsample$`8`$ENTREZID

)
GOclusterplot <- compareCluster(geneCluster = genelist, fun = "enrichGO", OrgDb = "org.Hs.eg.db")
dotplot(GOclusterplot)

go

KEGGclusterplot <- compareCluster(geneCluster = genelist, fun = "enrichKEGG")
dotplot(KEGGclusterplot)

kegg

Note: the KEGG plot doesn’t show the cluster 8 for some reason. I’m thinking because it wasn’t enriched for KEGG terms? Cluster 8 was also a tiny cluster… (See UMAP) Regardless, a quick and dirty way to visualize the enriched GO & KEGG terms per cluster.

Good luck!

Read more here: Source link