Data Obtainted from Michlmayr et al., 2020 Citation: Michlmayr D, Kim EY, Rahman AH, et al. Comprehensive Immunoprofiling of Pediatric Zika Reveals Key Role for Monocytes in the Acute Phase and No Effect of Prior Dengue Virus Infection. Cell Rep. 2020;31(4):107569. doi:10.1016/j.celrep.2020.107569
library(DESeq2)
library(ggplot2)
library(AnnotationDbi)
library(org.Hs.eg.db)
library(EnhancedVolcano)
library(biomaRt)
library(dplyr)
library(tidyr)
Loaded STAR generated counts files and converted to count matrix.
# Load files in
file.list <- list.files(path = "./", pattern = "*ReadsPerGene.out.tab$")
# Form combined counts table
counts.files <- lapply(file.list, read.table, skip = 4)
# Select counts
counts <- as.data.frame(sapply(counts.files, function(x) x[ ,2]))
# Set Column and Row Names for Data Frame:
colnames(counts) <- file.list
row.names(counts) <- counts.files[[1]]$V1
# Generating Metadata File:
sampleTable <- read.csv("MetaData.csv", header = T)
head(sampleTable)
## SampleName Condition Sex
## 1 combined_1304q1_Homo_sapiens_RNA-Seq_ReadsPerGene.out.tab Late Acute M
## 2 combined_1304q3_Homo_sapiens_RNA-Seq_ReadsPerGene.out.tab Convalescent M
## 3 combined_1304q5_Homo_sapiens_RNA-Seq_ReadsPerGene.out.tab Early Acute M
## 4 combined_2226q1_Homo_sapiens_RNA-Seq_ReadsPerGene.out.tab Late Acute M
## 5 combined_2226q3_Homo_sapiens_RNA-Seq_ReadsPerGene.out.tab Convalescent M
## 6 combined_2226q5_Homo_sapiens_RNA-Seq_ReadsPerGene.out.tab Early Acute M
## Batch
## 1 1
## 2 1
## 3 1
## 4 2
## 5 2
## 6 2
# DESeq Dataset Formation:
dds <- DESeqDataSetFromMatrix(countData = counts, colData = sampleTable, design = ~ Batch + Condition)
# Exploratory analysis and visualization
vsd <- vst(dds, blind=FALSE)
plotPCA(vsd, intgroup=c("Condition"))
plotPCA(vsd, intgroup=c("Condition", "Sex"))
# PCA Plot
pcaData <- plotPCA(vsd, intgroup=c("Condition", "Sex"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
pal <- c("Convalescent" = "coral2","Late Acute" = "steelblue3", "Early Acute" = "goldenrod")
ggplot(pcaData, aes(PC1, PC2, color=Condition, shape=Sex)) +
scale_shape_manual(values = c(1, 2)) +
geom_point(size=3.5, stroke = 1) +
xlab(paste0("PC1: ",round(percentVar[1]),"% variance")) +
ylab(paste0("PC2: ",round(percentVar[2]),"% variance")) +
scale_colour_manual(values = pal) +
theme_classic() +
theme(text = element_text(size = 20),
axis.line = element_line(size = 1.25),
axis.text = element_text(size = 20),
legend.text = element_text(size = 20))
output <- DESeq(dds)
res <- results(output)
head(res)
## log2 fold change (MLE): Condition Late.Acute vs Convalescent
## Wald test p-value: Condition Late.Acute vs Convalescent
## DataFrame with 6 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSG00000223972 0.5212397 -0.311458 0.566372 -0.5499182 0.582375
## ENSG00000227232 0.8569198 0.532358 0.421231 1.2638134 0.206297
## ENSG00000278267 0.0227661 -0.191833 3.559880 -0.0538875 0.957025
## ENSG00000243485 0.1621659 -0.704778 1.067448 -0.6602458 0.509096
## ENSG00000284332 0.0000000 NA NA NA NA
## ENSG00000237613 0.0511964 -0.512422 2.479617 -0.2066536 0.836280
## padj
## <numeric>
## ENSG00000223972 NA
## ENSG00000227232 0.413751
## ENSG00000278267 NA
## ENSG00000243485 NA
## ENSG00000284332 NA
## ENSG00000237613 NA
summary(res)
##
## out of 57152 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 4313, 7.5%
## LFC < 0 (down) : 5055, 8.8%
## outliers [1] : 0, 0%
## low counts [2] : 23193, 41%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
results_EA <- results(output, contrast=c("Condition","Early Acute","Convalescent"))
results_LA <- results(output, contrast=c("Condition","Late Acute","Convalescent"))
# metadata on the columns in dataframe
mcols(results_EA, use.names = TRUE)
## DataFrame with 6 rows and 2 columns
## type description
## <character> <character>
## baseMean intermediate mean of normalized c..
## log2FoldChange results log2 fold change (ML..
## lfcSE results standard error: Cond..
## stat results Wald statistic: Cond..
## pvalue results Wald test p-value: C..
## padj results BH adjusted p-values
summary(results_EA)
##
## out of 57152 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 5409, 9.5%
## LFC < 0 (down) : 5573, 9.8%
## outliers [1] : 0, 0%
## low counts [2] : 24298, 43%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
summary(results_LA)
##
## out of 57152 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 4313, 7.5%
## LFC < 0 (down) : 5055, 8.8%
## outliers [1] : 0, 0%
## low counts [2] : 23193, 41%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
sum(results_EA$padj < 0.05, na.rm=TRUE)
## [1] 9402
sum(results_LA$padj < 0.05, na.rm=TRUE)
## [1] 7642
#sorting results by p-value
results_EA_PValue <- results_EA[order(results_EA$padj),]
head(results_EA_PValue)
## log2 fold change (MLE): Condition Early Acute vs Convalescent
## Wald test p-value: Condition Early.Acute vs Convalescent
## DataFrame with 6 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSG00000165949 1284.196 4.91005 0.256701 19.1275 1.48967e-81
## ENSG00000184979 440.018 4.32443 0.230384 18.7705 1.31659e-78
## ENSG00000088827 205.255 4.66127 0.254847 18.2904 9.86250e-75
## ENSG00000163666 126.094 4.93677 0.272141 18.1405 1.52669e-73
## ENSG00000135047 459.150 2.72090 0.150871 18.0346 1.04309e-72
## ENSG00000020577 390.774 3.03687 0.169359 17.9315 6.69771e-72
## padj
## <numeric>
## ENSG00000165949 4.89416e-77
## ENSG00000184979 2.16275e-74
## ENSG00000088827 1.08008e-70
## ENSG00000163666 1.25395e-69
## ENSG00000135047 6.85392e-69
## ENSG00000020577 3.66744e-68
write.csv(as.data.frame(results_EA_PValue),
file="EA_DGE.csv")
results_LA_PValue <- results_LA[order(results_LA$padj),]
head(results_LA_PValue)
## log2 fold change (MLE): Condition Late Acute vs Convalescent
## Wald test p-value: Condition Late.Acute vs Convalescent
## DataFrame with 6 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## ENSG00000165949 1284.1957 4.200682 0.2567111 16.3635 3.48734e-60
## ENSG00000094804 179.2281 2.368300 0.1549328 15.2860 9.48256e-53
## ENSG00000092853 196.8449 1.651375 0.1174889 14.0556 7.11872e-45
## ENSG00000198830 2948.6603 0.716984 0.0519326 13.8060 2.34343e-43
## ENSG00000161057 1585.8744 0.724995 0.0528618 13.7149 8.26544e-43
## ENSG00000164045 72.3662 2.471960 0.1817859 13.5982 4.10433e-42
## padj
## <numeric>
## ENSG00000165949 1.18426e-55
## ENSG00000094804 1.61009e-48
## ENSG00000092853 8.05815e-41
## ENSG00000198830 1.98952e-39
## ENSG00000161057 5.61372e-39
## ENSG00000164045 2.32298e-38
write.csv(as.data.frame(results_LA_PValue),
file="LA_DGE.csv")
# changing ENSEMBL IDs to Gene IDs:
EA_CONV <- read.table("EA_DGE.csv", header = TRUE, sep =',')
IDs <- c(EA_CONV$X)
EA_CONV$Symbol <- mapIds(org.Hs.eg.db, IDs, 'SYMBOL', 'ENSEMBL')
rownames(EA_CONV) <- EA_CONV$X
write.csv(EA_CONV, file="EA_DGE_genes.csv")
EA_DGE_genes <- read.table("EA_DGE_genes.csv", header = TRUE, sep =',')
LA_CONV <- read.table("LA_DGE.csv", header = TRUE, sep =',')
IDs <- c(LA_CONV$X)
LA_CONV$Symbol <- mapIds(org.Hs.eg.db, IDs, 'SYMBOL', 'ENSEMBL')
rownames(LA_CONV) <- LA_CONV$X
write.csv(LA_CONV, file="LA_DGE_genes.csv")
LA_DGE_genes <- read.table("LA_DGE_genes.csv", header = TRUE, sep =',')
# subset the results table to genes with padj < 0.05 and LFC greater than 2 and less than -2
Sig_EA_CONV <- subset(EA_DGE_genes, padj <= 0.05 & (log2FoldChange >=1.5 |log2FoldChange <= -1.5))
head(Sig_EA_CONV)
## X.1 X baseMean log2FoldChange lfcSE stat
## 1 ENSG00000165949 ENSG00000165949 1284.1957 4.910052 0.2567008 19.12753
## 2 ENSG00000184979 ENSG00000184979 440.0182 4.324429 0.2303844 18.77049
## 3 ENSG00000088827 ENSG00000088827 205.2553 4.661270 0.2548474 18.29043
## 4 ENSG00000163666 ENSG00000163666 126.0940 4.936768 0.2721409 18.14049
## 5 ENSG00000135047 ENSG00000135047 459.1496 2.720901 0.1508714 18.03457
## 6 ENSG00000020577 ENSG00000020577 390.7742 3.036865 0.1693594 17.93148
## pvalue padj Symbol
## 1 1.489671e-81 4.894165e-77 IFI27
## 2 1.316585e-78 2.162755e-74 USP18
## 3 9.862499e-75 1.080075e-70 SIGLEC1
## 4 1.526690e-73 1.253946e-69 HESX1
## 5 1.043088e-72 6.853925e-69 CTSL
## 6 6.697707e-72 3.667441e-68 SAMD4A
summary(Sig_EA_CONV)
## X.1 X baseMean log2FoldChange
## Length:465 Length:465 Min. : 0.92 Min. :-2.865
## Class :character Class :character 1st Qu.: 4.37 1st Qu.: 1.593
## Mode :character Mode :character Median : 28.27 Median : 1.898
## Mean : 1547.73 Mean : 1.815
## 3rd Qu.: 254.09 3rd Qu.: 2.467
## Max. :46934.39 Max. : 6.315
## lfcSE stat pvalue padj
## Min. :0.09287 Min. :-13.758 Min. :0.000e+00 Min. :0.000e+00
## 1st Qu.:0.18543 1st Qu.: 5.540 1st Qu.:0.000e+00 1st Qu.:0.000e+00
## Median :0.25121 Median : 8.473 Median :0.000e+00 Median :0.000e+00
## Mean :0.27166 Mean : 7.946 Mean :1.061e-04 Mean :4.724e-04
## 3rd Qu.:0.33193 3rd Qu.: 12.168 3rd Qu.:3.000e-09 3rd Qu.:5.000e-08
## Max. :0.66911 Max. : 19.128 Max. :1.344e-02 Max. :4.749e-02
## Symbol
## Length:465
## Class :character
## Mode :character
##
##
##
write.csv(as.data.frame(Sig_EA_CONV), file="sig_EA_CONV.csv")
# Number of downregulated Genes:
sum(Sig_EA_CONV$log2FoldChange < 0, na.rm=TRUE)
## [1] 49
# Number of upregulated genes:
sum(Sig_EA_CONV$log2FoldChange > 0, na.rm=TRUE)
## [1] 416
Sig_LA_CONV <- subset(LA_DGE_genes, padj <= 0.05 & (log2FoldChange >=1.5 |log2FoldChange <= -1.5))
head(Sig_LA_CONV)
## X.1 X baseMean log2FoldChange lfcSE stat
## 1 ENSG00000165949 ENSG00000165949 1284.19567 4.200682 0.2567111 16.36346
## 2 ENSG00000094804 ENSG00000094804 179.22810 2.368300 0.1549328 15.28598
## 3 ENSG00000092853 ENSG00000092853 196.84487 1.651375 0.1174889 14.05559
## 6 ENSG00000164045 ENSG00000164045 72.36620 2.471960 0.1817859 13.59820
## 7 ENSG00000157456 ENSG00000157456 160.96677 2.264000 0.1681281 13.46592
## 8 ENSG00000085840 ENSG00000085840 71.05778 2.148829 0.1597606 13.45031
## pvalue padj Symbol
## 1 3.487336e-60 1.184265e-55 IFI27
## 2 9.482560e-53 1.610091e-48 CDC6
## 3 7.118720e-45 8.058154e-41 CLSPN
## 6 4.104327e-42 2.322981e-38 CDC25A
## 7 2.481993e-41 1.204086e-37 CCNB2
## 8 3.065898e-41 1.301435e-37 ORC1
summary(Sig_LA_CONV)
## X.1 X baseMean log2FoldChange
## Length:306 Length:306 Min. : 0.800 Min. :-2.6000
## Class :character Class :character 1st Qu.: 3.193 1st Qu.:-1.5311
## Mode :character Mode :character Median : 26.321 Median : 1.7304
## Mean : 332.933 Mean : 0.9331
## 3rd Qu.: 101.056 3rd Qu.: 2.0638
## Max. :25958.431 Max. : 4.2007
## lfcSE stat pvalue padj
## Min. :0.1175 Min. :-10.937 Min. :0.000e+00 Min. :0.000e+00
## 1st Qu.:0.2158 1st Qu.: -2.943 1st Qu.:0.000e+00 1st Qu.:0.000e+00
## Median :0.2785 Median : 6.350 Median :0.000e+00 Median :0.000e+00
## Mean :0.3032 Mean : 4.365 Mean :3.159e-04 Mean :1.763e-03
## 3rd Qu.:0.3779 3rd Qu.: 9.075 3rd Qu.:9.210e-07 3rd Qu.:1.462e-05
## Max. :0.6662 Max. : 16.363 Max. :8.822e-03 Max. :4.128e-02
## Symbol
## Length:306
## Class :character
## Mode :character
##
##
##
write.csv(as.data.frame(Sig_LA_CONV), file="sig_LA_CONV.csv")
# Number of downregulated Genes:
sum(Sig_LA_CONV$log2FoldChange < 0, na.rm=TRUE)
## [1] 87
# Number of upregulated genes:
sum(Sig_LA_CONV$log2FoldChange > 0, na.rm=TRUE)
## [1] 219
# Volcano Plot
EA_DGE_Volc <- EnhancedVolcano(EA_DGE_genes,
lab = as.character(EA_DGE_genes$Symbol),
x = "log2FoldChange",
y = "padj",
selectLab = c('IFI27', 'CCL2', 'CCL8', 'CXCL10', 'OTOF', 'HESX1', 'CXCL11', 'IFIT1', 'ATF3', 'DEFB1'),
pCutoff = 0.05,
FCcutoff = 1.5,
labSize = 4,
axisLabSize = 12,
col=c('black', 'black', 'black', 'red3'),
legendLabels=c('Not sig.','Log2FC','padj', 'padj & Log2FC'),
legendPosition = 'right',
legendLabSize = 16,
legendIconSize = 5.0,
labFace = 'bold',
xlim = c(-10, 10),
ylim = c(0,80),
title = "Early Acute vs Convalescent",
subtitle = "Differential expression",
gridlines.major = FALSE,
gridlines.minor = FALSE)
print(EA_DGE_Volc)
ggsave("EA_DGE_Volc.png")
LA_DGE_Volc <- EnhancedVolcano(LA_DGE_genes,
lab = as.character(LA_DGE_genes$Symbol),
x = "log2FoldChange",
y = "padj",
selectLab = c('IFI27', 'SPP1', 'RNY3', 'ORC1', 'TSPAN5', 'ADAMTS5', 'MCM4', 'BIRC5', 'CDC6'),
pCutoff = 0.05,
FCcutoff = 1.5,
labSize = 4,
axisLabSize = 12,
col=c('black', 'black', 'black', 'red3'),
legendLabels=c('Not sig.','Log2FC','padj', 'padj & Log2FC'),
legendPosition = 'right',
legendLabSize = 16,
legendIconSize = 5.0,
labFace = 'bold',
xlim = c(-10, 10),
ylim = c(0,80),
title = "Late Acute vs Convalescent",
subtitle = "Differential expression",
gridlines.major = FALSE,
gridlines.minor = FALSE)
print(LA_DGE_Volc)
CCL2 <- plotCounts(dds,
gene="ENSG00000108691", intgroup = "Condition", main = expression(atop("Expression of "*italic("CCL2"))))
CCL8 <- plotCounts(dds,
gene="ENSG00000108700", intgroup = "Condition", main = expression(atop("Expression of "*italic("CCL8"))))
IFI27 <- plotCounts(dds,
gene="ENSG00000165949", intgroup = "Condition", main = expression(atop("Expression of "*italic("IFI27"))))
# ATF3
ATF3 <- plotCounts(dds,
gene="ENSG00000162772", intgroup = "Condition", main = expression(atop("Expression of "*italic("ATF3"))))
# CXCL10
CXCL10 <- plotCounts(dds,
gene="ENSG00000169245", intgroup = "Condition", main = expression(atop("Expression of "*italic("CXCL10"))))
DEFB1 <- plotCounts(dds,
gene="ENSG00000164825", intgroup = "Condition", main = expression(atop("Expression of "*italic("DEFB1"))))
OTOF <- plotCounts(dds,
gene="ENSG00000115155", intgroup = "Condition", main = expression(atop("Expression of "*italic("OTOF"))))
library(biomaRt)
library(clusterProfiler)
library(org.Hs.eg.db)
library(DOSE)
library(ggnewscale)
library(pathview)
library(ggupset)
library(enrichplot)
IDs <- c(EA_CONV$X)
EA_CONV$Symbol <- mapIds(org.Hs.eg.db, IDs, 'SYMBOL', 'ENSEMBL')
rownames(EA_CONV) <- EA_CONV$X
# Convert the row names to entrez ids
IDs <- c(EA_CONV$X)
EA_DGE_genes$ENTREZID <- mapIds(org.Hs.eg.db, IDs, 'ENTREZID', 'ENSEMBL')
IDs <- c(LA_CONV$X)
LA_DGE_genes$ENTREZID <- mapIds(org.Hs.eg.db, IDs, 'ENTREZID', 'ENSEMBL')
# Preparing Gene lists for GO ontology:
resdata <- EA_DGE_genes
head(resdata)
## X.1 X baseMean log2FoldChange lfcSE stat
## 1 ENSG00000165949 ENSG00000165949 1284.1957 4.910052 0.2567008 19.12753
## 2 ENSG00000184979 ENSG00000184979 440.0182 4.324429 0.2303844 18.77049
## 3 ENSG00000088827 ENSG00000088827 205.2553 4.661270 0.2548474 18.29043
## 4 ENSG00000163666 ENSG00000163666 126.0940 4.936768 0.2721409 18.14049
## 5 ENSG00000135047 ENSG00000135047 459.1496 2.720901 0.1508714 18.03457
## 6 ENSG00000020577 ENSG00000020577 390.7742 3.036865 0.1693594 17.93148
## pvalue padj Symbol ENTREZID
## 1 1.489671e-81 4.894165e-77 IFI27 3429
## 2 1.316585e-78 2.162755e-74 USP18 11274
## 3 9.862499e-75 1.080075e-70 SIGLEC1 6614
## 4 1.526690e-73 1.253946e-69 HESX1 8820
## 5 1.043088e-72 6.853925e-69 CTSL 1514
## 6 6.697707e-72 3.667441e-68 SAMD4A 23034
original_gene_list <- EA_DGE_genes$log2FoldChange
head(original_gene_list)
## [1] 4.910052 4.324429 4.661270 4.936768 2.720901 3.036865
names(original_gene_list) <- EA_DGE_genes$ENTREZID
head(original_gene_list)
## 3429 11274 6614 8820 1514 23034
## 4.910052 4.324429 4.661270 4.936768 2.720901 3.036865
gene_list<-na.omit(original_gene_list)
gene_list = sort(gene_list, decreasing = TRUE)
# Extract significant results (padj < 0.05) using subset()
sig_genes_df = subset(EA_DGE_genes, padj < 0.05)
# From significant results, we want to filter on log2fold change
genes <- sig_genes_df$log2FoldChange
# Name the vector i.e. add entrez gene id
names(genes) <- sig_genes_df$ENTREZID
# omit NA values
genes <- na.omit(genes)
# filter on min log2fold change (log2FoldChange > 1)
genes <- names(genes)[abs(genes) > 1]
go_enrich <- enrichGO(gene = genes,
universe = names(gene_list),
OrgDb = org.Hs.eg.db,
keyType = 'ENTREZID',
readable = T,
ont = "BP",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
cluster_summary_EA <- data.frame(go_enrich)
write.csv(cluster_summary_EA, "clusterProfiler_EA.csv")
head(cluster_summary_EA)
## ID Description GeneRatio
## GO:0051607 GO:0051607 defense response to virus 63/603
## GO:0140546 GO:0140546 defense response to symbiont 63/603
## GO:0009615 GO:0009615 response to virus 71/603
## GO:0002831 GO:0002831 regulation of response to biotic stimulus 56/603
## GO:0019221 GO:0019221 cytokine-mediated signaling pathway 64/603
## GO:0050792 GO:0050792 regulation of viral process 38/603
## BgRatio pvalue p.adjust qvalue
## GO:0051607 264/17943 7.400488e-36 1.626627e-32 1.370259e-32
## GO:0140546 264/17943 7.400488e-36 1.626627e-32 1.370259e-32
## GO:0009615 361/17943 1.739970e-34 2.549636e-31 2.147794e-31
## GO:0002831 321/17943 1.372699e-24 1.508597e-21 1.270831e-21
## GO:0019221 457/17943 1.454214e-22 1.278545e-19 1.077037e-19
## GO:0050792 164/17943 1.639539e-21 1.201236e-18 1.011912e-18
## geneID
## GO:0051607 IFI27/RTP4/GBP3/CXCL10/OAS1/OAS2/IFI44L/IFIT1/MX1/IFIH1/IFI6/IFIT3/OASL/RSAD2/DDX60/PARP9/DHX58/TRIM22/NT5C3A/TRIM5/ZBP1/GBP1/STAT1/OAS3/STAT2/IFIT2/IFIT5/DTX3L/HERC5/PLSCR1/EIF2AK2/AIM2/SHFL/PML/TLR7/CGAS/DDX58/TRIM6/IFI16/ISG15/MOV10/BST2/MX2/IL15/LILRB1/ADAR/IFITM3/APOBEC3A/MICB/PMAIP1/IRF7/ISG20/ACOD1/IRF5/TLR3/RNASE2/IFITM1/IL27/UNC93B1/CXCL9/IFITM2/HTRA1/APOBEC3B
## GO:0140546 IFI27/RTP4/GBP3/CXCL10/OAS1/OAS2/IFI44L/IFIT1/MX1/IFIH1/IFI6/IFIT3/OASL/RSAD2/DDX60/PARP9/DHX58/TRIM22/NT5C3A/TRIM5/ZBP1/GBP1/STAT1/OAS3/STAT2/IFIT2/IFIT5/DTX3L/HERC5/PLSCR1/EIF2AK2/AIM2/SHFL/PML/TLR7/CGAS/DDX58/TRIM6/IFI16/ISG15/MOV10/BST2/MX2/IL15/LILRB1/ADAR/IFITM3/APOBEC3A/MICB/PMAIP1/IRF7/ISG20/ACOD1/IRF5/TLR3/RNASE2/IFITM1/IL27/UNC93B1/CXCL9/IFITM2/HTRA1/APOBEC3B
## GO:0009615 IFI27/RTP4/GBP3/CXCL10/OAS1/OAS2/IFI44/IFI44L/IFIT1/MX1/IFIH1/CCL8/IFI6/IFIT3/OASL/RSAD2/DDX60/PARP9/DHX58/TRIM22/NT5C3A/TRIM5/ZBP1/GBP1/STAT1/OAS3/STAT2/IFIT2/IFIT5/DTX3L/HERC5/PLSCR1/EIF2AK2/NMI/AIM2/SHFL/PML/TLR7/CGAS/DDX58/TRIM6/IFI16/ISG15/MOV10/BST2/MX2/IL15/LILRB1/ADAR/IFITM3/NPC2/APOBEC3A/MICB/PMAIP1/BATF3/IRF7/ISG20/LGALS9/ACOD1/IRF5/TLR3/RNASE2/IFITM1/IL27/UNC93B1/SRC/CXCL9/IFITM2/HTRA1/APOBEC3B/CXCL12
## GO:0002831 USP18/OAS1/IFIT1/OASL/DDX60/PARP9/DHX58/TRIM5/ZBP1/STAT1/OAS3/STAT2/DTX3L/HERC5/PLSCR1/NMI/AIM2/SERPING1/CGAS/DDX58/TRIM6/TASL/IFI16/IFI35/SCIMP/CARD16/ISG15/IL15/TRAFD1/HAVCR2/LILRB1/ADAR/MICB/PARP14/CLEC6A/SOCS1/CD274/SASH1/TRIM21/GBP5/CEACAM1/IRF7/FCGR2B/FFAR2/LGALS9/ACOD1/IL27/LY96/SRC/CXCL6/GRN/IL4I1/LAG3/KIR2DL4/EREG/HTRA1
## GO:0019221 IFI27/USP18/CCL2/CXCL10/OAS1/OAS2/MX1/CCL8/CXCL11/OASL/PARP9/ZBP1/STAT1/CCRL2/OAS3/STAT2/NMI/AIM2/IL1RN/TRIM6/LILRB4/CARD16/ISG15/CASP1/IL15/FCER1G/LILRB1/JAK2/ADAR/IFITM3/TNFSF13B/CMKLR1/PARP14/CCR1/SOCS1/AXL/CEACAM1/IRF7/IRF5/HSPA1B/IFITM1/CLDN18/KIT/PRLR/LILRA6/LILRA5/IL31RA/FOXC1/CSF1R/SRC/CXCR5/IL5RA/CXCL9/OSM/CXCL6/EGR1/CCL7/IFITM2/CCL1/EREG/CCL23/CXCL12/LILRB5/CCL25
## GO:0050792 OAS1/OAS2/IFIT1/MX1/IFIH1/OASL/RSAD2/TRIM22/TRIM5/STAT1/OAS3/TRIM38/IFIT5/PLSCR1/EIF2AK2/DYNLT1/SHFL/PML/TRIM14/TRIM6/LAMP3/IFI16/ISG15/BST2/ADAR/IFITM3/APOBEC3A/LY6E/AXL/TRIM21/ISG20/LGALS9/IFITM1/PARP10/IFITM2/APOBEC3B/TMPRSS2/CLEC4G
## Count
## GO:0051607 63
## GO:0140546 63
## GO:0009615 71
## GO:0002831 56
## GO:0019221 64
## GO:0050792 38
# Repeat for Late Acute DGE:
resdata <- LA_DGE_genes
head(resdata)
## X.1 X baseMean log2FoldChange lfcSE stat
## 1 ENSG00000165949 ENSG00000165949 1284.1957 4.2006821 0.25671110 16.36346
## 2 ENSG00000094804 ENSG00000094804 179.2281 2.3683003 0.15493284 15.28598
## 3 ENSG00000092853 ENSG00000092853 196.8449 1.6513752 0.11748886 14.05559
## 4 ENSG00000198830 ENSG00000198830 2948.6603 0.7169841 0.05193262 13.80605
## 5 ENSG00000161057 ENSG00000161057 1585.8744 0.7249951 0.05286177 13.71492
## 6 ENSG00000164045 ENSG00000164045 72.3662 2.4719603 0.18178590 13.59820
## pvalue padj Symbol ENTREZID
## 1 3.487336e-60 1.184265e-55 IFI27 3429
## 2 9.482560e-53 1.610091e-48 CDC6 990
## 3 7.118720e-45 8.058154e-41 CLSPN 63967
## 4 2.343434e-43 1.989516e-39 HMGN2 3151
## 5 8.265444e-43 5.613724e-39 PSMC2 5701
## 6 4.104327e-42 2.322981e-38 CDC25A 993
original_gene_list <- LA_DGE_genes$log2FoldChange
head(original_gene_list)
## [1] 4.2006821 2.3683003 1.6513752 0.7169841 0.7249951 2.4719603
names(original_gene_list) <- LA_DGE_genes$ENTREZID
head(original_gene_list)
## 3429 990 63967 3151 5701 993
## 4.2006821 2.3683003 1.6513752 0.7169841 0.7249951 2.4719603
gene_list<-na.omit(original_gene_list)
gene_list = sort(gene_list, decreasing = TRUE)
# Extract significant results (padj < 0.05) using subset()
sig_genes_df = subset(LA_DGE_genes, padj < 0.05)
# From significant results, we want to filter on log2fold change
genes <- sig_genes_df$log2FoldChange
# Name the vector i.e. add entrez gene id
names(genes) <- sig_genes_df$ENTREZID
# omit NA values
genes <- na.omit(genes)
# filter on min log2fold change (log2FoldChange > 1)
genes <- names(genes)[abs(genes) > 1]
go_enrich_LA <- enrichGO(gene = genes,
universe = names(gene_list),
OrgDb = org.Hs.eg.db,
keyType = 'ENTREZID',
readable = T,
ont = "BP",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
cluster_summary_LA <- data.frame(go_enrich_LA)
write.csv(cluster_summary_EA, "clusterProfiler_LA.csv")
head(cluster_summary_LA)
## ID
## GO:0002377 GO:0002377
## GO:0006958 GO:0006958
## GO:0006910 GO:0006910
## GO:0002455 GO:0002455
## GO:0006956 GO:0006956
## GO:0002440 GO:0002440
## Description
## GO:0002377 immunoglobulin production
## GO:0006958 complement activation, classical pathway
## GO:0006910 phagocytosis, recognition
## GO:0002455 humoral immune response mediated by circulating immunoglobulin
## GO:0006956 complement activation
## GO:0002440 production of molecular mediator of immune response
## GeneRatio BgRatio pvalue p.adjust qvalue
## GO:0002377 67/502 207/17943 7.564539e-53 2.537146e-49 2.418264e-49
## GO:0006958 47/502 96/17943 2.104977e-47 3.530046e-44 3.364639e-44
## GO:0006910 45/502 88/17943 1.527323e-46 1.707547e-43 1.627537e-43
## GO:0002455 48/502 109/17943 1.440340e-45 1.207725e-42 1.151135e-42
## GO:0006956 48/502 118/17943 1.577809e-43 1.058394e-40 1.008801e-40
## GO:0002440 68/502 299/17943 1.948242e-42 1.089067e-39 1.038037e-39
## geneID
## GO:0002377 EXO1/XBP1/IGKV3-15/IGKV1-5/IGKC/IGKV1-39/IGKV4-1/IGKV1D-39/IGLV6-57/IGKV1-16/IGLV2-14/IGKV1-27/IGKV3-20/IGLV1-44/IGKV3D-15/IGLV3-10/IGKV2-24/MZB1/IGKV1-8/IGLV2-23/IGLV3-25/IGKV2-30/IGLV2-11/IGKV3D-11/IGLV1-40/IGLV1-47/IGLV3-19/IGKV1D-33/IGKV3D-20/IGLV8-61/IGKV1-12/IGKV2-28/IGKV5-2/IGKV2D-28/IGKV1-13/IGLV2-18/IGLV3-9/IGLV3-16/IGLV3-22/IGLV7-43/IGKV1D-13/IGKV1D-12/IGLV3-21/IGLV5-48/IGLV9-49/IGKV2D-30/IGLV5-45/IGKV1-9/IGLV2-8/IGKV1-17/IGKV2D-24/IGLV7-46/IGKV1D-43/IGKV6D-21/IGKV2D-26/IGLV1-36/IGKV3-7/IGLV4-60/IGKV1-6/IL13RA2/IGLV3-27/IGLV4-3/IGKV6-21/IGKV3D-7/IGKV2-29/IGKV1D-17/IGLV5-37
## GO:0006958 IGHV3-7/IGHV3-74/IGKC/IGHV4-39/IGHV3-53/IGLC2/IGHV3-11/IGHV6-1/IGHV3-21/IGHV3-48/IGHV4-59/IGHV3-23/IGHV3-66/IGHV3-72/IGHM/IGHV3-20/IGHV2-5/IGHV5-51/IGHV3-64/C1QB/IGHV3-49/IGHV3-15/IGHV3-30/IGHV3-43/IGHV3-33/IGHG1/IGHV1-18/IGHV3-73/IGHV3-13/IGHA1/IGLC3/IGHV2-26/IGHV4-61/IGHV2-70D/C2/IGHV2-70/IGHV4-34/IGHV4-28/IGHV4-4/C1QA/IGHV1-3/IGHV1-69D/IGHG2/IGHV1-58/IGHV1-69/IGHV3-64D/IGHA2
## GO:0006910 IGHV3-7/IGHV3-74/IGKC/IGHV4-39/IGHV3-53/IGLC2/IGHV3-11/IGHV6-1/IGHV3-21/IGHV3-48/IGHV4-59/IGHV3-23/IGHV3-66/IGHV3-72/IGHM/IGHV3-20/IGHV2-5/IGHV5-51/IGHV3-64/IGHV3-49/IGHV3-15/IGHV3-30/IGHV3-43/IGHV3-33/IGHG1/IGHV1-18/IGHV3-73/IGHV3-13/IGHA1/IGLC3/IGHV2-26/IGHV4-61/IGHV2-70D/IGHV2-70/IGHV4-34/IGHV4-28/IGHV4-4/COLEC12/IGHV1-3/IGHV1-69D/IGHG2/IGHV1-58/IGHV1-69/IGHV3-64D/IGHA2
## GO:0002455 EXO1/IGHV3-7/IGHV3-74/IGKC/IGHV4-39/IGHV3-53/IGLC2/IGHV3-11/IGHV6-1/IGHV3-21/IGHV3-48/IGHV4-59/IGHV3-23/IGHV3-66/IGHV3-72/IGHM/IGHV3-20/IGHV2-5/IGHV5-51/IGHV3-64/C1QB/IGHV3-49/IGHV3-15/IGHV3-30/IGHV3-43/IGHV3-33/IGHG1/IGHV1-18/IGHV3-73/IGHV3-13/IGHA1/IGLC3/IGHV2-26/IGHV4-61/IGHV2-70D/C2/IGHV2-70/IGHV4-34/IGHV4-28/IGHV4-4/C1QA/IGHV1-3/IGHV1-69D/IGHG2/IGHV1-58/IGHV1-69/IGHV3-64D/IGHA2
## GO:0006956 IGHV3-7/IGHV3-74/IGKC/IGHV4-39/IGHV3-53/IGLC2/IGHV3-11/IGHV6-1/IGHV3-21/IGHV3-48/IGHV4-59/IGHV3-23/IGHV3-66/IGHV3-72/IGHM/IGHV3-20/KRT1/IGHV2-5/IGHV5-51/IGHV3-64/C1QB/IGHV3-49/IGHV3-15/IGHV3-30/IGHV3-43/IGHV3-33/IGHG1/IGHV1-18/IGHV3-73/IGHV3-13/IGHA1/IGLC3/IGHV2-26/IGHV4-61/IGHV2-70D/C2/IGHV2-70/IGHV4-34/IGHV4-28/IGHV4-4/C1QA/IGHV1-3/IGHV1-69D/IGHG2/IGHV1-58/IGHV1-69/IGHV3-64D/IGHA2
## GO:0002440 EXO1/XBP1/IGKV3-15/IGKV1-5/IGKC/IGKV1-39/IGKV4-1/IGKV1D-39/IGLV6-57/IGKV1-16/IGLV2-14/IGKV1-27/IGKV3-20/IGLV1-44/IGKV3D-15/IGLV3-10/IGKV2-24/MZB1/IGKV1-8/IGLV2-23/IGLV3-25/IGKV2-30/IGLV2-11/IGKV3D-11/IGLV1-40/IGLV1-47/IGLV3-19/IGKV1D-33/IGKV3D-20/IGLV8-61/IGKV1-12/IGKV2-28/IGKV5-2/IGKV2D-28/IGKV1-13/IGLV2-18/IGLV3-9/IGLV3-16/IGLV3-22/IGLV7-43/IGKV1D-13/IGKV1D-12/IGLV3-21/IGLV5-48/IGLV9-49/IGKV2D-30/IGLV5-45/IGKV1-9/IGLV2-8/IGKV1-17/RSAD2/IGKV2D-24/IGLV7-46/IGKV1D-43/IGKV6D-21/IGKV2D-26/IGLV1-36/IGKV3-7/IGLV4-60/IGKV1-6/IL13RA2/IGLV3-27/IGLV4-3/IGKV6-21/IGKV3D-7/IGKV2-29/IGKV1D-17/IGLV5-37
## Count
## GO:0002377 67
## GO:0006958 47
## GO:0006910 45
## GO:0002455 48
## GO:0006956 48
## GO:0002440 68
EA_GObarplot <- barplot(go_enrich, showCategory = 10,
title = "Early Acute GO Biological Pathways",
font.size = 8)
EA_GObarplot
dot <- dotplot(go_enrich)
dot
LA_GObarplot <- barplot(go_enrich_LA, showCategory = 10,
title = "Late Acute GO Biological Pathways",
font.size = 8)
LA_GObarplot
dot2 <- dotplot(go_enrich_LA)
dot2
# heatmap:
# library("pheatmap")
# top_genes <- EA_DGE_genes$Symbol[1:10]
#
# rld <- rlog(dds, blind=FALSE)
# mat <- assay(rld)[topgenes,]
# mat <- mat - rowMeans(mat)
# rownames(mat) <- topgenes_symbols$Symbol
# df <- as.data.frame(colData(dds)[,c("condition")])
# colnames(df) <- "Condition"
# row.names(df) <- sampleTable$sampleName
# pheatmap(mat, annotation_col=df, cluster_rows=TRUE, show_rownames=TRUE, cluster_cols=FALSE)
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] enrichplot_1.14.2 ggupset_0.3.0
## [3] pathview_1.34.0 ggnewscale_0.4.8
## [5] DOSE_3.20.1 clusterProfiler_4.2.2
## [7] tidyr_1.3.0 dplyr_1.1.2
## [9] biomaRt_2.50.3 EnhancedVolcano_1.12.0
## [11] ggrepel_0.9.3 org.Hs.eg.db_3.14.0
## [13] AnnotationDbi_1.56.2 ggplot2_3.4.2
## [15] DESeq2_1.34.0 SummarizedExperiment_1.24.0
## [17] Biobase_2.54.0 MatrixGenerics_1.6.0
## [19] matrixStats_0.63.0 GenomicRanges_1.46.1
## [21] GenomeInfoDb_1.30.1 IRanges_2.28.0
## [23] S4Vectors_0.32.4 BiocGenerics_0.40.0
##
## loaded via a namespace (and not attached):
## [1] shadowtext_0.1.2 fastmatch_1.1-3 BiocFileCache_2.2.1
## [4] systemfonts_1.0.4 plyr_1.8.8 igraph_1.4.2
## [7] lazyeval_0.2.2 splines_4.1.2 BiocParallel_1.28.3
## [10] digest_0.6.31 yulab.utils_0.0.6 htmltools_0.5.5
## [13] GOSemSim_2.20.0 viridis_0.6.2 GO.db_3.14.0
## [16] fansi_1.0.4 magrittr_2.0.3 memoise_2.0.1
## [19] Biostrings_2.62.0 annotate_1.72.0 graphlayouts_0.8.4
## [22] extrafont_0.19 extrafontdb_1.0 prettyunits_1.1.1
## [25] colorspace_2.1-0 blob_1.2.4 rappdirs_0.3.3
## [28] textshaping_0.3.6 xfun_0.39 crayon_1.5.2
## [31] RCurl_1.98-1.12 jsonlite_1.8.4 graph_1.72.0
## [34] scatterpie_0.1.9 genefilter_1.76.0 ape_5.7-1
## [37] survival_3.5-5 glue_1.6.2 polyclip_1.10-4
## [40] gtable_0.3.3 zlibbioc_1.40.0 XVector_0.34.0
## [43] DelayedArray_0.20.0 proj4_1.0-12 Rgraphviz_2.38.0
## [46] Rttf2pt1_1.3.12 maps_3.4.1 scales_1.2.1
## [49] DBI_1.1.3 Rcpp_1.0.10 viridisLite_0.4.1
## [52] xtable_1.8-4 progress_1.2.2 tidytree_0.4.2
## [55] gridGraphics_0.5-1 bit_4.0.5 httr_1.4.5
## [58] fgsea_1.20.0 RColorBrewer_1.1-3 pkgconfig_2.0.3
## [61] XML_3.99-0.14 farver_2.1.1 sass_0.4.5
## [64] dbplyr_2.3.2 locfit_1.5-9.7 utf8_1.2.3
## [67] ggplotify_0.1.0 tidyselect_1.2.0 labeling_0.4.2
## [70] rlang_1.1.0 reshape2_1.4.4 munsell_0.5.0
## [73] tools_4.1.2 cachem_1.0.7 downloader_0.4
## [76] cli_3.6.1 generics_0.1.3 RSQLite_2.3.1
## [79] evaluate_0.20 stringr_1.5.0 fastmap_1.1.1
## [82] yaml_2.3.7 ragg_1.2.5 ggtree_3.2.1
## [85] knitr_1.42 bit64_4.0.5 tidygraph_1.2.3
## [88] purrr_1.0.1 KEGGREST_1.34.0 ggraph_2.1.0
## [91] nlme_3.1-162 ash_1.0-15 ggrastr_1.0.1
## [94] KEGGgraph_1.54.0 aplot_0.1.10 DO.db_2.9
## [97] xml2_1.3.3 compiler_4.1.2 rstudioapi_0.14
## [100] beeswarm_0.4.0 filelock_1.0.2 curl_5.0.0
## [103] png_0.1-8 treeio_1.18.1 tibble_3.2.1
## [106] tweenr_2.0.2 geneplotter_1.72.0 bslib_0.4.2
## [109] stringi_1.7.12 highr_0.10 ggalt_0.4.0
## [112] lattice_0.21-8 Matrix_1.4-1 vctrs_0.6.2
## [115] pillar_1.9.0 lifecycle_1.0.3 jquerylib_0.1.4
## [118] data.table_1.14.8 bitops_1.0-7 patchwork_1.1.2
## [121] qvalue_2.26.0 R6_2.5.1 KernSmooth_2.23-20
## [124] gridExtra_2.3 vipor_0.4.5 MASS_7.3-58.1
## [127] withr_2.5.0 GenomeInfoDbData_1.2.7 parallel_4.1.2
## [130] hms_1.1.3 ggfun_0.0.9 grid_4.1.2
## [133] rmarkdown_2.21 ggforce_0.4.1 ggbeeswarm_0.7.1