Setup the default knitr options for the R code sections below.
Load the packages used throughout this analysis.
setwd("/share/users/Mike/AutismPaper/RNASeq/R")
suppressMessages({
library('kableExtra')
library('formattable')
library('goseq')
library('readr')
library('stringr')
library('tibble')
library('dplyr')
library('tidyr')
library('VennDiagram')
library('tximport')
library('DESeq2')
library('regionReport')
library('ggplot2')
library('BiocParallel')
library('RColorBrewer')
library('colorspace')
library('dendextend')
library('gplots')
library('superheat')
library('GGally')
library('corrplot')
library('pheatmap')
library('DT')
library('devtools')
library('VennDiagram')
library('ggrepel')
library('gridExtra')
library('vsn')
library('MASS')
library('plotly')
library('htmlwidgets')
register(MulticoreParam(detectCores()))
options(tibble.print_max=100)
})
This section assembles the metadata for the datasets included in this analysis.
meta_int <- read_tsv('../../metadata/RNASeq.internal.tsv') %>%
mutate(library = 'paired end') %>%
mutate(readlength = 101) %>%
mutate(tissue = 'SVZ') %>%
mutate(path = paste0('/share/users/Mike/AutismPaper/RNASeq/OurData/',
sample, '.refseqnorrna.kallisto.paired/abundance.h5')
)
meta_ext <- read_tsv('../../metadata/RNASeq.external.tsv') %>%
mutate(library = 'single end') %>%
mutate(readlength = 75) %>%
mutate(condition = 'Control') %>%
mutate(path = paste0('/share/users/Mike/AutismPaper/RNASeq/external/',
sample, '.refseqnorrna.kallisto.single/abundance.h5')
)
meta_ext1 <-read_tsv('../../metadata/RNASeq.external2.tsv') %>%
mutate(path = paste0('/share/users/Mike/AutismPaper/RNASeq/external2/',
sample, '.refseqnorrna.kallisto.single/abundance.h5')
)
metadata <- bind_rows(meta_int, meta_ext, meta_ext1) %>%
mutate(age = factor(if_else(!is.na(age_wpc), 'Fetal',
if_else(between(age_years,1,19),'Child','Adult')),
levels = c('Fetal', 'Child', 'Adult')
)
) %>%
mutate(tissue1 = if_else(grepl('SVZ$', tissue), 'SVZ',
if_else(grepl('frontal', tissue), 'FC',
if_else(grepl('temporal', tissue), 'TC',
tissue)
)
)
) %>%
mutate(sample = if_else(grepl('OurData', path),
paste(sep="_",sub("(\\w).*", "\\1", condition),
paste0(age_years, 'y'),
sample),
if_else(grepl('external2',path),
paste(sep='_', sub("(\\w).*", "\\1", condition),
tissue1,
paste0(age_years, 'y'),
sub(".*(\\d{2})$", "\\1", sample)
),
paste(sep='_', tissue,
paste0(age_wpc, 'w'),
sub(".*(\\d{2})$", "\\1", sample)
)
)
)
) %>%
filter(is.na(sex) | sex == 'M')
Select the metadata corresponding to the SVZ tissue samples:
metadata1 <- metadata %>%
filter(grepl('SVZ',tissue) & !grepl('H16$', sample)) %>%
mutate(condition = factor(condition, levels=c('Control', 'Autism'))) %>%
mutate(grp = factor(paste0(age,condition), levels=c('FetalControl', 'ChildControl', 'ChildAutism', 'AdultControl', 'AdultAutism'))) %>%
arrange(sample) %>%
dplyr::select( c('sample', 'condition', 'library', 'readlength', 'tissue', 'age', 'grp', 'path'))
metadata1
Read the files with the kallisto estimated counts for each of the transcripts in RefSeq version 108 (http://www.ncbi.nlm.nih.gov/genome/annotation_euk/Homo_sapiens/108/).
files <- metadata1 %>% dplyr::select('sample', 'path') %>% spread('sample', 'path') %>% as.list() %>% unlist()
txi.kallisto <- tximport(files, type = "kallisto", txOut = TRUE)
metadata1.df <- metadata1 %>% column_to_rownames(var="sample") %>% as.data.frame()
rownames(txi.kallisto$counts) <- as.character(rownames(txi.kallisto$counts))
Read the transcript-to-genes metadata table that associates RefSeq accessions (e.g. NM_xxxxxx) to Entrez Gene IDs, symbols, etc.
ttgfile <- '/share/db/refseq/human/refseq.108.rna.t2g.txt'
ttg <- read_tsv(ttgfile)
grp) with a baseline level corresponding to the Control condition and Fetal age group.dds <- DESeqDataSetFromTximport(txi = txi.kallisto, colData = metadata1.df, design = ~grp)
c <- counts(dds) %>% as.tibble()
c[['target_id']] <- rownames(counts(dds))
c <- c %>% mutate(sum1 = rowSums(dplyr::select(., contains("SVZ"))),
sum2 = rowSums(dplyr::select(., starts_with("A_"))),
sum3 = rowSums(dplyr::select(., starts_with("C_"))))
# n1 <- colnames(c) %>% as.tibble() %>% filter(grepl('SVZ', value)) %>% summarise(n()) %>% as.numeric()
# n2 <- colnames(c) %>% as.tibble() %>% filter(grepl('A_', value)) %>% summarise(n()) %>% as.numeric()
# n3 <- colnames(c) %>% as.tibble() %>% filter(grepl('C_', value)) %>% summarise(n()) %>% as.numeric()
c <- c %>% filter((sum1 > 1) & (sum2 > 1) & (sum3 > 1))
dds <- dds[c$target_id,]
rm('c')
ddsWald <- DESeq(dds, test = "Wald", betaPrior = TRUE, parallel = TRUE)
resultsNames(ddsWald)
## [1] "Intercept" "grpFetalControl" "grpChildControl" "grpChildAutism"
## [5] "grpAdultControl" "grpAdultAutism"
Extract the results from the model.
resultsChild <- results(ddsWald, alpha = 0.05,
contrast = c('grp', 'ChildControl', 'FetalControl'))
resultsAdult <- results(ddsWald, alpha = 0.05,
contrast = c('grp', 'AdultControl', 'FetalControl'))
resultsChildA <- results(ddsWald, alpha = 0.05,
contrast = c('grp', 'ChildAutism', 'FetalControl'))
resultsAdultA <- results(ddsWald, alpha = 0.05,
contrast = c('grp', 'AdultAutism', 'FetalControl'))
resultsChildCA <- results(ddsWald, alpha = 0.05,
contrast = c('grp','ChildControl','ChildAutism'))
resultsAdultCA <- results(ddsWald, alpha = 0.05,
contrast = c('grp','AdultControl','AdultAutism'))
# From the DESeq2 vignette:
#
# "Regarding multiple test correction, if a user is planning to contrast all pairs of many levels
# and then selectively reporting the results of only a subset of those pairs, one needs to perform multiple testing across contrasts
# as well as genes to control for this additional form of multiple testing.
# This can be done by using the p.adjust function across a long vector of p values from all pairs of contrasts,
# then re-assigning these adjusted p values to the appropriate results table."
# pChild <- resultsChild %>% as.tibble() %>% rownames_to_column(var='target_id') %>%
# dplyr::select(c('target_id', 'pvalue')) %>% spread('target_id', 'pvalue') %>%
# as.list() %>% unlist()
# pAdult <- resultsAdult %>% as.tibble() %>% rownames_to_column(var='target_id') %>%
# dplyr::select(c('target_id', 'pvalue')) %>% spread('target_id', 'pvalue') %>%
# as.list() %>% unlist()
signifChild <- resultsChild %>%
as.tibble(.) %>% rownames_to_column(var='target_id') %>%
filter(padj<0.05) %>% arrange(padj) %>%
left_join(ttg, by = 'target_id') %>%
dplyr::select(c('target_id', 'baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj', 'gene_id', 'symbol', 'description', 'status', 'molecule_type'))
signifAdult <- resultsAdult %>%
as.tibble(.) %>% rownames_to_column(var='target_id') %>%
filter(padj<0.05) %>% arrange(padj) %>%
left_join(ttg, by = 'target_id') %>%
dplyr::select(c('target_id', 'baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj', 'gene_id', 'symbol', 'description', 'status', 'molecule_type'))
signifChildA <- resultsChildA %>%
as.tibble(.) %>% rownames_to_column(var='target_id') %>%
filter(padj<0.05) %>% arrange(padj) %>%
left_join(ttg, by = 'target_id') %>%
dplyr::select(c('target_id', 'baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj', 'gene_id', 'symbol', 'description', 'status', 'molecule_type'))
signifAdultA <- resultsAdultA %>%
as.tibble(.) %>% rownames_to_column(var='target_id') %>%
filter(padj<0.05) %>% arrange(padj) %>%
left_join(ttg, by = 'target_id') %>%
dplyr::select(c('target_id', 'baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj', 'gene_id', 'symbol', 'description', 'status', 'molecule_type'))
signifChildCA <- resultsChildCA %>%
as.tibble(.) %>% rownames_to_column(var='target_id') %>%
filter(padj<0.05) %>% arrange(padj) %>%
left_join(ttg, by = 'target_id') %>%
dplyr::select(c('target_id', 'baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj', 'gene_id', 'symbol', 'description', 'status', 'molecule_type'))
signifAdultCA <- resultsAdultCA %>%
as.tibble(.) %>% rownames_to_column(var='target_id') %>%
filter(padj<0.05) %>% arrange(padj) %>%
left_join(ttg, by = 'target_id') %>%
dplyr::select(c('target_id', 'baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj', 'gene_id', 'symbol', 'description', 'status', 'molecule_type'))
devChildCA <- intersect(signifChild$target_id, signifChildCA$target_id)
devAdultCA <- intersect(signifAdult$target_id, signifAdultCA$target_id)
devChildCAg <- tibble(target_id=devChildCA) %>% left_join(ttg, by='target_id') %>%
dplyr::select('gene_id') %>% as.list() %>% unlist() %>% unique()
devAdultCAg <- tibble(target_id=devAdultCA) %>% left_join(ttg, by='target_id') %>%
dplyr::select('gene_id') %>% as.list() %>% unlist() %>% unique()
devChildCAs <- tibble(target_id=devChildCA) %>% left_join(ttg, by='target_id') %>%
dplyr::select('symbol') %>% as.list() %>% unlist() %>% unique()
devAdultCAs <- tibble(target_id=devAdultCA) %>% left_join(ttg, by='target_id') %>%
dplyr::select('symbol') %>% as.list() %>% unlist() %>% unique()
Summary of the number of transcripts and genes differentially expressed in control samples of different age groups vs. the fetal samples (“developmental genes”):
rsum_control_fetal <- tribble( ~Counts, ~Child, ~Adult,
'Transcripts',
length(signifChild$target_id),
length(signifAdult$target_id),
'Genes',
length(unique(signifChild$gene_id)),
length(unique(signifAdult$gene_id))
)
kable(rsum_control_fetal, format = 'markdown')
| Counts | Child | Adult |
|---|---|---|
| Transcripts | 12258 | 14368 |
| Genes | 9046 | 10052 |
Venn diagram:
grid.draw(venn.diagram(list(Child=signifChild$target_id, Adult=signifAdult$target_id),
filename = NULL,
alpha = 0.5,
inverted = TRUE,
cat.pos = c(45, 315),
fill = c("cornflowerblue", "darkorchid1"),
cat.col = c("darkblue", "darkorchid4")
)
)
Summary of the number of transcripts and genes differentially expressed in autism samples of different age groups vs. the fetal samples:
rsum_autism_fetal <- tribble( ~Counts, ~Child, ~Adult,
'Transcripts',
length(signifChildA$target_id),
length(signifAdultA$target_id),
'Genes',
length(unique(signifChildA$gene_id)),
length(unique(signifAdultA$gene_id))
)
kable(rsum_autism_fetal, format = 'markdown')
| Counts | Child | Adult |
|---|---|---|
| Transcripts | 14570 | 13832 |
| Genes | 10335 | 9870 |
Venn diagram:
grid.draw(venn.diagram(list(Child=signifChildA$target_id, Adult=signifAdultA$target_id),
filename = NULL,
alpha = 0.5,
fill = c("cornflowerblue", "darkorchid1"),
cat.col = c("darkblue", "darkorchid4")
)
)
Summary of the number of transcripts and genes differentially expressed in autism samples of different age groups vs. the same age control samples:
rsum_autism_control <- tribble( ~Counts, ~Child, ~Adult,
'Transcripts',
length(signifChildCA$target_id),
length(signifAdultCA$target_id),
'Genes',
length(unique(signifChildCA$gene_id)),
length(unique(signifAdultCA$gene_id))
)
kable(rsum_autism_control, format = 'markdown')
| Counts | Child | Adult |
|---|---|---|
| Transcripts | 21 | 46 |
| Genes | 21 | 44 |
Venn diagram:
grid.draw(venn.diagram(list(Child=signifChildCA$target_id, Adult=signifAdultCA$target_id),
rotation.degree = 180,
filename = NULL,
alpha = 0.5,
fill = c("cornflowerblue", "darkorchid1"),
cat.col = c("darkblue", "darkorchid4")
)
)
Summary of the number of transcripts and genes differentially expressed in autism samples of different age groups vs. the same age control samples AND also different in the control age group vs. the fetal samples (“developmental”):
rsum_autism_control_fetal <- tribble( ~Counts, ~Child, ~Adult,
'Transcripts',
length(devChildCA),
length(devAdultCA),
'Genes',
length(devChildCAg),
length(devAdultCAg)
)
kable(rsum_autism_control_fetal, format = 'markdown')
| Counts | Child | Adult |
|---|---|---|
| Transcripts | 16 | 37 |
| Genes | 16 | 35 |
Venn diagram:
grid.draw(venn.diagram(list(Child=devChildCA, Adult=devAdultCA),
filename = NULL,
rotation.degree = 180,
alpha = 0.5,
fill = c("cornflowerblue", "darkorchid1"),
cat.col = c("darkblue", "darkorchid4")
)
)
## Transform count data
intgroup = "grp"
rld <- rlog(ddsWald, blind = FALSE)
vst <- varianceStabilizingTransformation(ddsWald, blind=FALSE)
## Perform PCA analysis and get percent of variance explained
data_pca <- plotPCA(rld, intgroup = intgroup, ntop = 1000, returnData = TRUE)
percentVar <- round(100 * attr(data_pca, "percentVar"))
## Make plot
data_pca %>%
ggplot(aes_string(x = "PC1", y = "PC2", color = "grp")) +
geom_point(size = 3) +
xlab(paste0("PC1: ", percentVar[1], "% variance")) +
ylab(paste0("PC2: ", percentVar[2], "% variance")) +
coord_fixed() +
geom_text(aes(label=name), nudge_x = 1, hjust = 0, size = 3, check_overlap = FALSE)
The above plot shows the first two principal components that explain the variability in the data using the regularized log count data. If you are unfamiliar with principal component analysis, you might want to check the Wikipedia entry or this interactive explanation. In this case, the first and second principal component explain percentVar[1] and percentVar[2] percent of the variance respectively.
## Obtain the sample euclidean distances
sampleDists <- dist(t(assay(rld)))
sampleDistMatrix <- as.matrix(sampleDists)
## Add names based on intgroup
rownames(sampleDistMatrix) <- rownames(colData(rld))
colnames(sampleDistMatrix) <- NULL
## Define colors to use for the heatmap if none were supplied
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
## Make the heatmap
pheatmap(sampleDistMatrix, clustering_distance_rows = sampleDists,
clustering_distance_cols = sampleDists, color = colors)
This plot shows how samples are clustered based on their euclidean distance using the regularized log transformed count data. This figure gives an overview of how the samples are hierarchically clustered. It is a complementary figure to the PCA plot.
This section contains the MA plots (see Wikipedia) that compare the mean of the normalized counts against the log fold change. They show one point per feature. The points are shown in red if the feature has an adjusted p-value less than alpha, that is, the statistically significant features are shown in red.
## MA plot with alpha used in DESeq2::results()
plotMA(resultsChild, alpha = 0.05, main = 'MA plot with alpha = 0.05', ylim = c(-10, 10))
abline(h=c(-1,1),col="dodgerblue",lwd=2)
This plot uses alpha = 0.05, which is the alpha value used to determine which resulting features were significant when running the function DESeq2::results().
## MA plot with alpha used in DESeq2::results()
plotMA(resultsAdult, alpha = 0.05, main = 'MA plot with alpha = 0.05', ylim = c(-10, 10))
abline(h=c(-1,1),col="dodgerblue",lwd=2)
This plot uses alpha = 0.05, which is the alpha value used to determine which resulting features were significant when running the function DESeq2::results().
This section contains the MA plots (see Wikipedia) that compare the mean of the normalized counts against the log fold change. They show one point per feature. The points are shown in red if the feature has an adjusted p-value less than alpha, that is, the statistically significant features are shown in red.
## MA plot with alpha used in DESeq2::results()
plotMA(resultsChildA, alpha = 0.05, main = 'MA plot with alpha = 0.05', ylim = c(-10, 10))
abline(h=c(-1,1),col="dodgerblue",lwd=2)
This plot uses alpha = 0.05, which is the alpha value used to determine which resulting features were significant when running the function DESeq2::results().
## MA plot with alpha used in DESeq2::results()
plotMA(resultsAdultA, alpha = 0.05, main = 'MA plot with alpha = 0.05', ylim = c(-10, 10))
abline(h=c(-1,1),col="dodgerblue",lwd=2)
This plot uses alpha = 0.05, which is the alpha value used to determine which resulting features were significant when running the function DESeq2::results().
This section contains the MA plots (see Wikipedia) that compare the mean of the normalized counts against the log fold change. They show one point per feature. The points are shown in red if the feature has an adjusted p-value less than alpha, that is, the statistically significant features are shown in red.
## MA plot with alpha used in DESeq2::results()
plotMA(resultsChildCA, alpha = 0.05, main = 'MA plot with alpha = 0.05', ylim = c(-10, 10))
abline(h=c(-1,1),col="dodgerblue",lwd=2)
This plot uses alpha = 0.05, which is the alpha value used to determine which resulting features were significant when running the function DESeq2::results().
## MA plot with alpha used in DESeq2::results()
plotMA(resultsAdultCA, alpha = 0.05, main = 'MA plot with alpha = 0.05', ylim = c(-10, 10))
abline(h=c(-1,1),col="dodgerblue",lwd=2)
This plot uses alpha = 0.05, which is the alpha value used to determine which resulting features were significant when running the function DESeq2::results().
This plot shows a histogram of the unadjusted p-values. It might be skewed right or left, or flat as shown in the Wikipedia examples. The shape depends on the percent of features that are differentially expressed. For further information on how to interpret a histogram of p-values check David Robinson’s post on this topic.
## P-value histogram plot
resultsChild %>% as.tibble() %>%
filter(!is.na(pvalue)) %>%
ggplot(aes(x = pvalue)) +
geom_histogram(alpha=.5, position='identity', bins = 50) +
labs(title='Histogram of unadjusted p-values') +
xlab('Unadjusted p-values') +
coord_cartesian(xlim=c(0, 1.0005), ylim=c(0,20000))
This is the numerical summary of the distribution of the p-values.
## P-value distribution summary
res <- resultsChild %>%
as.tibble() %>%
filter(!is.na(pvalue))
summary(res$pvalue)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0552 0.3419 0.3862 0.6726 1.0000
## P-value histogram plot
resultsAdult %>% as.tibble() %>%
filter(!is.na(pvalue)) %>%
ggplot(aes(x = pvalue)) +
geom_histogram(alpha=.5, position='identity', bins = 50) +
labs(title='Histogram of unadjusted p-values') +
xlab('Unadjusted p-values') +
coord_cartesian(xlim=c(0, 1.0005), ylim=c(0,20000))
This is the numerical summary of the distribution of the p-values.
## P-value distribution summary
res <- resultsAdult %>%
as.tibble() %>%
filter(!is.na(pvalue))
summary(res$pvalue)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.03371 0.29400 0.36157 0.64621 0.99994
This plot shows a histogram of the unadjusted p-values. It might be skewed right or left, or flat as shown in the Wikipedia examples. The shape depends on the percent of features that are differentially expressed. For further information on how to interpret a histogram of p-values check David Robinson’s post on this topic.
## P-value histogram plot
resultsChildA %>% as.tibble() %>%
filter(!is.na(pvalue)) %>%
ggplot(aes(x = pvalue)) +
geom_histogram(alpha=.5, position='identity', bins = 50) +
labs(title='Histogram of unadjusted p-values') +
xlab('Unadjusted p-values') +
coord_cartesian(xlim=c(0, 1.0005), ylim=c(0,20000))
This is the numerical summary of the distribution of the p-values.
## P-value distribution summary
res <- resultsChildA %>%
as.tibble() %>%
filter(!is.na(pvalue))
summary(res$pvalue)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.03463 0.30691 0.36608 0.64983 0.99995
## P-value histogram plot
resultsAdultA %>% as.tibble() %>%
filter(!is.na(pvalue)) %>%
ggplot(aes(x = pvalue)) +
geom_histogram(alpha=.5, position='identity', bins = 50) +
labs(title='Histogram of unadjusted p-values') +
xlab('Unadjusted p-values') +
coord_cartesian(xlim=c(0, 1.0005), ylim=c(0,20000))
This is the numerical summary of the distribution of the p-values.
## P-value distribution summary
res <- resultsAdultA %>%
as.tibble() %>%
filter(!is.na(pvalue))
summary(res$pvalue)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.03664 0.30336 0.36591 0.65259 1.00000
This plot shows a histogram of the unadjusted p-values. It might be skewed right or left, or flat as shown in the Wikipedia examples. The shape depends on the percent of features that are differentially expressed. For further information on how to interpret a histogram of p-values check David Robinson’s post on this topic.
## P-value histogram plot
resultsChildCA %>% as.tibble() %>%
filter(!is.na(pvalue)) %>%
ggplot(aes(x = pvalue)) +
geom_histogram(alpha=.5, position='identity', bins = 50) +
labs(title='Histogram of unadjusted p-values') +
xlab('Unadjusted p-values') +
coord_cartesian(xlim=c(0, 1.0005), ylim=c(0,20000))
This is the numerical summary of the distribution of the p-values.
## P-value distribution summary
res <- resultsChildCA %>%
as.tibble() %>%
filter(!is.na(pvalue))
summary(res$pvalue)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000001 0.4699726 0.6940816 0.6433850 0.8584663 0.9999953
## P-value histogram plot
resultsAdultCA %>% as.tibble() %>%
filter(!is.na(pvalue)) %>%
ggplot(aes(x = pvalue)) +
geom_histogram(alpha=.5, position='identity', bins = 50) +
labs(title='Histogram of unadjusted p-values') +
xlab('Unadjusted p-values') +
coord_cartesian(xlim=c(0, 1.0005), ylim=c(0,20000))
This is the numerical summary of the distribution of the p-values.
## P-value distribution summary
res <- resultsAdultCA %>%
as.tibble() %>%
filter(!is.na(pvalue))
summary(res$pvalue)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.4639 0.6936 0.6421 0.8596 1.0000
This table shows the number of features with p-values less or equal than some commonly used cutoff values.
## Split features by different p-value cutoffs
pval_table <- resultsChild %>%
as.tibble() %>%
filter(!is.na(pvalue)) %>%
mutate(Cut = as.numeric(
as.character(
cut(pvalue, include.lowest = TRUE,
breaks=c(0, 1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1),
labels = c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1))))) %>%
group_by(Cut) %>%
summarise(nCut=n()) %>%
mutate(Count=cumsum(nCut)) %>%
dplyr::select(c('Cut', 'Count'))
# pval_table <- lapply(c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
# 0.6, 0.7, 0.8, 0.9, 1), function(x) {
# data.frame('Cut' = x, 'Count' = sum(resultsChild$pvalue <= x, na.rm = TRUE))
# })
# pval_table <- do.call(rbind, pval_table)
if (outputIsHTML) {
kable(pval_table, format = 'markdown', align = c('c', 'c'))
} else {
kable(pval_table)
}
| Cut | Count |
|---|---|
| 0.0001 | 6044 |
| 0.0010 | 8255 |
| 0.0100 | 12463 |
| 0.0250 | 15155 |
| 0.0500 | 18142 |
| 0.1000 | 22538 |
| 0.2000 | 29191 |
| 0.3000 | 35051 |
| 0.4000 | 40621 |
| 0.5000 | 46145 |
| 0.6000 | 51844 |
| 0.7000 | 57646 |
| 0.8000 | 63375 |
| 0.9000 | 69044 |
| 1.0000 | 74814 |
## Split features by different p-value cutoffs
pval_table <- resultsAdult %>%
as.tibble() %>%
filter(!is.na(pvalue)) %>%
mutate(Cut = as.numeric(
as.character(
cut(pvalue, include.lowest = TRUE,
breaks=c(0, 1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1),
labels = c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1))))) %>%
group_by(Cut) %>%
summarise(nCut=n()) %>%
mutate(Count=cumsum(nCut)) %>%
dplyr::select(c('Cut', 'Count'))
# pval_table <- lapply(c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
# 0.6, 0.7, 0.8, 0.9, 1), function(x) {
# data.frame('Cut' = x, 'Count' = sum(resultsAdult$pvalue <= x, na.rm = TRUE))
# })
# pval_table <- do.call(rbind, pval_table)
if(outputIsHTML) {
kable(pval_table, format = 'markdown', align = c('c', 'c'))
} else {
kable(pval_table)
}
| Cut | Count |
|---|---|
| 0.0001 | 6927 |
| 0.0010 | 9524 |
| 0.0100 | 14302 |
| 0.0250 | 17449 |
| 0.0500 | 20691 |
| 0.1000 | 25303 |
| 0.2000 | 31965 |
| 0.3000 | 37735 |
| 0.4000 | 43042 |
| 0.5000 | 48234 |
| 0.6000 | 53677 |
| 0.7000 | 58999 |
| 0.8000 | 64247 |
| 0.9000 | 69543 |
| 1.0000 | 74814 |
## Split features by different p-value cutoffs
pval_table <- resultsChildA %>%
as.tibble() %>%
filter(!is.na(pvalue)) %>%
mutate(Cut = as.numeric(
as.character(
cut(pvalue, include.lowest = TRUE,
breaks=c(0, 1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1),
labels = c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1))))) %>%
group_by(Cut) %>%
summarise(nCut=n()) %>%
mutate(Count=cumsum(nCut)) %>%
dplyr::select(c('Cut', 'Count'))
# pval_table <- lapply(c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
# 0.6, 0.7, 0.8, 0.9, 1), function(x) {
# data.frame('Cut' = x, 'Count' = sum(resultsChild$pvalue <= x, na.rm = TRUE))
# })
# pval_table <- do.call(rbind, pval_table)
if(outputIsHTML) {
kable(pval_table, format = 'markdown', align = c('c', 'c'))
} else {
kable(pval_table)
}
| Cut | Count |
|---|---|
| 0.0001 | 7431 |
| 0.0010 | 9930 |
| 0.0100 | 14377 |
| 0.0250 | 17364 |
| 0.0500 | 20509 |
| 0.1000 | 24816 |
| 0.2000 | 31322 |
| 0.3000 | 37005 |
| 0.4000 | 42432 |
| 0.5000 | 47956 |
| 0.6000 | 53422 |
| 0.7000 | 58718 |
| 0.8000 | 64140 |
| 0.9000 | 69434 |
| 1.0000 | 74814 |
## Split features by different p-value cutoffs
pval_table <- resultsAdultA %>%
as.tibble() %>%
filter(!is.na(pvalue)) %>%
mutate(Cut = as.numeric(
as.character(
cut(pvalue, include.lowest = TRUE,
breaks=c(0, 1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1),
labels = c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1))))) %>%
group_by(Cut) %>%
summarise(nCut=n()) %>%
mutate(Count=cumsum(nCut)) %>%
dplyr::select(c('Cut', 'Count'))
# pval_table <- lapply(c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
# 0.6, 0.7, 0.8, 0.9, 1), function(x) {
# data.frame('Cut' = x, 'Count' = sum(resultsAdult$pvalue <= x, na.rm = TRUE))
# })
# pval_table <- do.call(rbind, pval_table)
if(outputIsHTML) {
kable(pval_table, format = 'markdown', align = c('c', 'c'))
} else {
kable(pval_table)
}
| Cut | Count |
|---|---|
| 0.0001 | 6601 |
| 0.0010 | 9159 |
| 0.0100 | 13827 |
| 0.0250 | 17042 |
| 0.0500 | 20245 |
| 0.1000 | 24771 |
| 0.2000 | 31484 |
| 0.3000 | 37231 |
| 0.4000 | 42641 |
| 0.5000 | 47958 |
| 0.6000 | 53355 |
| 0.7000 | 58607 |
| 0.8000 | 64084 |
| 0.9000 | 69446 |
| 1.0000 | 74814 |
## Split features by different p-value cutoffs
pval_table <- resultsChildCA %>%
as.tibble() %>%
filter(!is.na(pvalue)) %>%
mutate(Cut = as.numeric(
as.character(
cut(pvalue, include.lowest = TRUE,
breaks=c(0, 1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1),
labels = c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1))))) %>%
group_by(Cut) %>%
summarise(nCut=n()) %>%
mutate(Count=cumsum(nCut)) %>%
dplyr::select(c('Cut', 'Count'))
# pval_table <- lapply(c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
# 0.6, 0.7, 0.8, 0.9, 1), function(x) {
# data.frame('Cut' = x, 'Count' = sum(resultsChild$pvalue <= x, na.rm = TRUE))
# })
# pval_table <- do.call(rbind, pval_table)
if(outputIsHTML) {
kable(pval_table, format = 'markdown', align = c('c', 'c'))
} else {
kable(pval_table)
}
| Cut | Count |
|---|---|
| 0.0001 | 38 |
| 0.0010 | 111 |
| 0.0100 | 434 |
| 0.0250 | 833 |
| 0.0500 | 1424 |
| 0.1000 | 2633 |
| 0.2000 | 5669 |
| 0.3000 | 9560 |
| 0.4000 | 14390 |
| 0.5000 | 20686 |
| 0.6000 | 28606 |
| 0.7000 | 37974 |
| 0.8000 | 49060 |
| 0.9000 | 61476 |
| 1.0000 | 74814 |
## Split features by different p-value cutoffs
pval_table <- resultsAdultCA %>%
as.tibble() %>%
filter(!is.na(pvalue)) %>%
mutate(Cut = as.numeric(
as.character(
cut(pvalue, include.lowest = TRUE,
breaks=c(0, 1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1),
labels = c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1))))) %>%
group_by(Cut) %>%
summarise(nCut=n()) %>%
mutate(Count=cumsum(nCut)) %>%
dplyr::select(c('Cut', 'Count'))
# pval_table <- lapply(c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
# 0.6, 0.7, 0.8, 0.9, 1), function(x) {
# data.frame('Cut' = x, 'Count' = sum(resultsAdult$pvalue <= x, na.rm = TRUE))
# })
# pval_table <- do.call(rbind, pval_table)
if(outputIsHTML) {
kable(pval_table, format = 'markdown', align = c('c', 'c'))
} else {
kable(pval_table)
}
| Cut | Count |
|---|---|
| 0.0001 | 64 |
| 0.0010 | 153 |
| 0.0100 | 528 |
| 0.0250 | 964 |
| 0.0500 | 1614 |
| 0.1000 | 3003 |
| 0.2000 | 6070 |
| 0.3000 | 9871 |
| 0.4000 | 14834 |
| 0.5000 | 21090 |
| 0.6000 | 28650 |
| 0.7000 | 38085 |
| 0.8000 | 49087 |
| 0.9000 | 60974 |
| 1.0000 | 74814 |
This plot shows a histogram of the BH adjusted p-values. It might be skewed right or left, or flat as shown in the Wikipedia examples.
## Adjusted p-values histogram plot
hdescription <- elementMetadata(resultsChild) %>%
as.tibble() %>%
filter(grepl('adjusted',description)) %>%
dplyr::select('description') %>%
as.character()
resultsChild %>%
as.tibble() %>%
filter(!is.na(padj)) %>%
ggplot(aes(x = padj)) +
geom_histogram(alpha=.5, position='identity', bins = 50) +
labs(title=paste('Histogram of',hdescription)) +
xlab('Adjusted p-values') +
coord_cartesian(xlim=c(0, 1.0005), ylim=c(0,10000))
This is the numerical summary of the distribution of the BH adjusted p-values.
## Adjusted p-values distribution summary
res <- resultsChild %>%
as.tibble() %>%
filter(!is.na(padj))
summary(res$padj)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.1439 0.5735 0.5129 0.8473 1.0000
This plot shows a histogram of the BH adjusted p-values. It might be skewed right or left, or flat as shown in the Wikipedia examples.
## Adjusted p-values histogram plot
hdescription <- elementMetadata(resultsAdult) %>%
as.tibble() %>%
filter(grepl('adjusted',description)) %>%
dplyr::select('description') %>%
as.character()
resultsAdult %>%
as.tibble() %>%
filter(!is.na(padj)) %>%
ggplot(aes(x = padj)) +
geom_histogram(alpha=.5, position='identity', bins = 50) +
labs(title=paste('Histogram of',hdescription)) +
xlab('Adjusted p-values') +
coord_cartesian(xlim=c(0, 1.0005), ylim=c(0,10000))
This is the numerical summary of the distribution of the BH adjusted p-values.
## Adjusted p-values distribution summary
res <- resultsAdult %>%
as.tibble() %>%
filter(!is.na(padj))
summary(res$padj)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.1035 0.5225 0.4851 0.8254 0.9999
This plot shows a histogram of the BH adjusted p-values. It might be skewed right or left, or flat as shown in the Wikipedia examples.
## Adjusted p-values histogram plot
hdescription <- elementMetadata(resultsChildA) %>%
as.tibble() %>%
filter(grepl('adjusted',description)) %>%
dplyr::select('description') %>%
as.character()
resultsChildA %>%
as.tibble() %>%
filter(!is.na(padj)) %>%
ggplot(aes(x = padj)) +
geom_histogram(alpha=.5, position='identity', bins = 50) +
labs(title=paste('Histogram of',hdescription)) +
xlab('Adjusted p-values') +
coord_cartesian(xlim=c(0, 1.0005), ylim=c(0,10000))
This is the numerical summary of the distribution of the BH adjusted p-values.
## Adjusted p-values distribution summary
res <- resultsChildA %>%
as.tibble() %>%
filter(!is.na(padj))
summary(res$padj)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.08851 0.51096 0.47669 0.81749 0.99993
This plot shows a histogram of the BH adjusted p-values. It might be skewed right or left, or flat as shown in the Wikipedia examples.
## Adjusted p-values histogram plot
hdescription <- elementMetadata(resultsAdultA) %>%
as.tibble() %>%
filter(grepl('adjusted',description)) %>%
dplyr::select('description') %>%
as.character()
resultsAdultA %>%
as.tibble() %>%
filter(!is.na(padj)) %>%
ggplot(aes(x = padj)) +
geom_histogram(alpha=.5, position='identity', bins = 50) +
labs(title=paste('Histogram of',hdescription)) +
xlab('Adjusted p-values') +
coord_cartesian(xlim=c(0, 1.0005), ylim=c(0,10000))
This is the numerical summary of the distribution of the BH adjusted p-values.
## Adjusted p-values distribution summary
res <- resultsAdultA %>%
as.tibble() %>%
filter(!is.na(padj))
summary(res$padj)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.1047 0.5218 0.4852 0.8238 1.0000
This plot shows a histogram of the BH adjusted p-values. It might be skewed right or left, or flat as shown in the Wikipedia examples.
## Adjusted p-values histogram plot
hdescription <- elementMetadata(resultsChildCA) %>%
as.tibble() %>%
filter(grepl('adjusted',description)) %>%
dplyr::select('description') %>%
as.character()
resultsChildCA %>%
as.tibble() %>%
filter(!is.na(padj)) %>%
ggplot(aes(x = padj)) +
geom_histogram(alpha=.5, position='identity', bins = 50) +
labs(title=paste('Histogram of',hdescription)) +
xlab('Adjusted p-values') +
coord_cartesian(xlim=c(0, 1.0005), ylim=c(0,100))
This is the numerical summary of the distribution of the BH adjusted p-values.
## Adjusted p-values distribution summary
res <- resultsChildCA %>%
as.tibble() %>%
filter(!is.na(padj))
summary(res$padj)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.004091 0.999991 0.999991 0.998160 0.999991 0.999995
This plot shows a histogram of the BH adjusted p-values. It might be skewed right or left, or flat as shown in the Wikipedia examples.
## Adjusted p-values histogram plot
hdescription <- elementMetadata(resultsAdultCA) %>%
as.tibble() %>%
filter(grepl('adjusted',description)) %>%
dplyr::select('description') %>%
as.character()
resultsAdultCA %>%
as.tibble() %>%
filter(!is.na(padj)) %>%
ggplot(aes(x = padj)) +
geom_histogram(alpha=.5, position='identity', bins = 50) +
labs(title=paste('Histogram of',hdescription)) +
xlab('Adjusted p-values') +
coord_cartesian(xlim=c(0, 1.0005), ylim=c(0,100))
This is the numerical summary of the distribution of the BH adjusted p-values.
## Adjusted p-values distribution summary
res <- resultsAdultCA %>%
as.tibble() %>%
filter(!is.na(padj))
summary(res$padj)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 1.0000 1.0000 0.9972 1.0000 1.0000
This table shows the number of features with BH adjusted p-values less or equal than some commonly used cutoff values.
## Split features by different adjusted p-value cutoffs
padj_table <- lapply(c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
0.6, 0.7, 0.8, 0.9, 1), function(x) {
data.frame('Cut' = x, 'Count' = sum(resultsChild$padj <= x, na.rm = TRUE))
})
padj_table <- do.call(rbind, padj_table)
if(outputIsHTML) {
kable(padj_table, format = 'markdown', align = c('c', 'c'))
} else {
kable(padj_table)
}
| Cut | Count |
|---|---|
| 0.0001 | 4393 |
| 0.0010 | 5971 |
| 0.0100 | 8630 |
| 0.0250 | 10432 |
| 0.0500 | 12258 |
| 0.1000 | 14711 |
| 0.2000 | 18678 |
| 0.3000 | 22319 |
| 0.4000 | 25890 |
| 0.5000 | 29779 |
| 0.6000 | 34297 |
| 0.7000 | 39411 |
| 0.8000 | 45655 |
| 0.9000 | 54467 |
| 1.0000 | 66040 |
This table shows the number of features with BH adjusted p-values less or equal than some commonly used cutoff values.
## Split features by different adjusted p-value cutoffs
padj_table <- lapply(c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
0.6, 0.7, 0.8, 0.9, 1), function(x) {
data.frame('Cut' = x, 'Count' = sum(resultsAdult$padj <= x, na.rm = TRUE))
})
padj_table <- do.call(rbind, padj_table)
if(outputIsHTML) {
kable(padj_table, format = 'markdown', align = c('c', 'c'))
} else {
kable(padj_table)
}
| Cut | Count |
|---|---|
| 0.0001 | 5152 |
| 0.0010 | 6914 |
| 0.0100 | 10059 |
| 0.0250 | 12170 |
| 0.0500 | 14368 |
| 0.1000 | 17382 |
| 0.2000 | 21924 |
| 0.3000 | 26042 |
| 0.4000 | 29932 |
| 0.5000 | 34132 |
| 0.6000 | 38912 |
| 0.7000 | 44176 |
| 0.8000 | 50909 |
| 0.9000 | 59256 |
| 1.0000 | 70367 |
This table shows the number of features with BH adjusted p-values less or equal than some commonly used cutoff values.
## Split features by different adjusted p-value cutoffs
padj_table <- lapply(c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
0.6, 0.7, 0.8, 0.9, 1), function(x) {
data.frame('Cut' = x, 'Count' = sum(resultsChildA$padj <= x, na.rm = TRUE))
})
padj_table <- do.call(rbind, padj_table)
if(outputIsHTML) {
kable(padj_table, format = 'markdown', align = c('c', 'c'))
} else {
kable(padj_table)
}
| Cut | Count |
|---|---|
| 0.0001 | 5751 |
| 0.0010 | 7529 |
| 0.0100 | 10630 |
| 0.0250 | 12590 |
| 0.0500 | 14570 |
| 0.1000 | 17461 |
| 0.2000 | 21770 |
| 0.3000 | 25566 |
| 0.4000 | 29311 |
| 0.5000 | 33243 |
| 0.6000 | 37775 |
| 0.7000 | 43146 |
| 0.8000 | 49387 |
| 0.9000 | 57068 |
| 1.0000 | 67474 |
This table shows the number of features with BH adjusted p-values less or equal than some commonly used cutoff values.
## Split features by different adjusted p-value cutoffs
padj_table <- lapply(c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
0.6, 0.7, 0.8, 0.9, 1), function(x) {
data.frame('Cut' = x, 'Count' = sum(resultsAdultA$padj <= x, na.rm = TRUE))
})
padj_table <- do.call(rbind, padj_table)
if(outputIsHTML) {
kable(padj_table, format = 'markdown', align = c('c', 'c'))
} else {
kable(padj_table)
}
| Cut | Count |
|---|---|
| 0.0001 | 4831 |
| 0.0010 | 6563 |
| 0.0100 | 9679 |
| 0.0250 | 11667 |
| 0.0500 | 13832 |
| 0.1000 | 16954 |
| 0.2000 | 21413 |
| 0.3000 | 25321 |
| 0.4000 | 29354 |
| 0.5000 | 33465 |
| 0.6000 | 38211 |
| 0.7000 | 43514 |
| 0.8000 | 49957 |
| 0.9000 | 57862 |
| 1.0000 | 68905 |
This table shows the number of features with BH adjusted p-values less or equal than some commonly used cutoff values.
## Split features by different adjusted p-value cutoffs
padj_table <- lapply(c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
0.6, 0.7, 0.8, 0.9, 1), function(x) {
data.frame('Cut' = x, 'Count' = sum(resultsChildCA$padj <= x, na.rm = TRUE))
})
padj_table <- do.call(rbind, padj_table)
if(outputIsHTML) {
kable(padj_table, format = 'markdown', align = c('c', 'c'))
} else {
kable(padj_table)
}
| Cut | Count |
|---|---|
| 0.0001 | 0 |
| 0.0010 | 0 |
| 0.0100 | 3 |
| 0.0250 | 11 |
| 0.0500 | 21 |
| 0.1000 | 29 |
| 0.2000 | 47 |
| 0.3000 | 62 |
| 0.4000 | 73 |
| 0.5000 | 98 |
| 0.6000 | 114 |
| 0.7000 | 150 |
| 0.8000 | 182 |
| 0.9000 | 209 |
| 1.0000 | 59149 |
This table shows the number of features with BH adjusted p-values less or equal than some commonly used cutoff values.
## Split features by different adjusted p-value cutoffs
padj_table <- lapply(c(1e-04, 0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5,
0.6, 0.7, 0.8, 0.9, 1), function(x) {
data.frame('Cut' = x, 'Count' = sum(resultsAdultCA$padj <= x, na.rm = TRUE))
})
padj_table <- do.call(rbind, padj_table)
if(outputIsHTML) {
kable(padj_table, format = 'markdown', align = c('c', 'c'))
} else {
kable(padj_table)
}
| Cut | Count |
|---|---|
| 0.0001 | 3 |
| 0.0010 | 4 |
| 0.0100 | 18 |
| 0.0250 | 24 |
| 0.0500 | 46 |
| 0.1000 | 63 |
| 0.2000 | 83 |
| 0.3000 | 105 |
| 0.4000 | 144 |
| 0.5000 | 175 |
| 0.6000 | 205 |
| 0.7000 | 239 |
| 0.8000 | 268 |
| 0.9000 | 315 |
| 1.0000 | 63261 |
resultsChild %>%
as.tibble() %>%
rownames_to_column(var='target_id') %>%
mutate(sig=ifelse(target_id %in% signifChild[['target_id']], "FDR<0.05", "Not Sig")) %>%
left_join(ttg, by = 'target_id') %>%
ggplot(aes(log2FoldChange, -log10(padj))) +
geom_point(aes(col=sig)) +
scale_color_manual(values=c("red", "black"))
# coord_cartesian(ylim=c(0,100))
resultsAdult %>%
as.tibble() %>%
rownames_to_column(var='target_id') %>%
mutate(sig=ifelse(target_id %in% signifAdult[['target_id']], "FDR<0.05", "Not Sig")) %>%
left_join(ttg, by = 'target_id') %>%
ggplot(aes(log2FoldChange, -log10(padj))) +
geom_point(aes(col=sig)) +
scale_color_manual(values=c("red", "black"))
#coord_cartesian(ylim=c(0,100))
resultsChildA %>%
as.tibble() %>%
rownames_to_column(var='target_id') %>%
mutate(sig=ifelse(target_id %in% signifChildA[['target_id']], "FDR<0.05", "Not Sig")) %>%
left_join(ttg, by = 'target_id') %>%
ggplot(aes(log2FoldChange, -log10(padj))) +
geom_point(aes(col=sig)) +
scale_color_manual(values=c("red", "black"))
# coord_cartesian(ylim=c(0,100))
resultsAdultA %>%
as.tibble() %>%
rownames_to_column(var='target_id') %>%
mutate(sig=ifelse(target_id %in% signifAdultA[['target_id']], "FDR<0.05", "Not Sig")) %>%
left_join(ttg, by = 'target_id') %>%
ggplot(aes(log2FoldChange, -log10(padj))) +
geom_point(aes(col=sig)) +
scale_color_manual(values=c("red", "black"))
# coord_cartesian(ylim=c(0,100))
r1 <- resultsChildCA %>%
as.tibble() %>%
rownames_to_column(var='target_id') %>%
mutate(sig=ifelse(target_id %in% signifChildCA[['target_id']], "FDR<0.05", "Not Sig")) %>%
left_join(ttg, by = 'target_id')
r1 %>% ggplot(aes(log2FoldChange, -log10(padj))) +
geom_point(aes(col=sig)) +
scale_color_manual(values=c("red", "black")) +
# coord_cartesian(ylim=c(0,10)) +
geom_text_repel(data=filter(r1, sig=='FDR<0.05'), aes(label=symbol))
r1 <- resultsAdultCA %>%
as.tibble() %>%
rownames_to_column(var='target_id') %>%
mutate(sig=ifelse(target_id %in% signifAdultCA[['target_id']], "FDR<0.05", "Not Sig")) %>%
left_join(ttg, by = 'target_id')
r1 %>% ggplot(aes(log2FoldChange, -log10(padj))) +
geom_point(aes(col=sig)) +
scale_color_manual(values=c("red", "black")) +
# coord_cartesian(ylim=c(0,10)) +
geom_text_repel(data=filter(r1, sig=='FDR<0.05'), aes(label=symbol))
This table shows the 50 most significant genes (out of 9046) differentially expressed between control child samples and fetal samples.
genes_control_fetal_child <- signifChild %>%
mutate(gene = text_spec(symbol, link = paste0('http://www.genecards.org/cgi-bin/carddisp.pl?gene=', symbol)),
log2FoldChange = ifelse(log2FoldChange < 0,
color_tile("red", "white")(signif(log2FoldChange, 3)),
color_tile("white", "darkgreen")(signif(log2FoldChange, 3))),
transcript = target_id) %>%
dplyr::select(c('gene', 'transcript', 'log2FoldChange', 'padj', 'description', 'status', 'molecule_type')) %>%
head(n = 50)
genes_control_fetal_child %>%
kable("html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 8, full_width = F)
| gene | transcript | log2FoldChange | padj | description | status | molecule_type |
|---|---|---|---|---|---|---|
| SOX11 | NM_003108.3 | -7.340 | 0 | SRY-box 11 (SOX11) | REVIEWED | mRNA |
| GPC2 | NM_152742.2 | -5.830 | 0 | glypican 2 (GPC2) | VALIDATED | mRNA |
| MEX3A | NM_001093725.1 | -6.220 | 0 | mex-3 RNA binding family member A (MEX3A) | VALIDATED | mRNA |
| LMNB1 | NM_005573.3 | -5.960 | 0 | lamin B1 (LMNB1), transcript variant 1 | REVIEWED | mRNA |
| KIF11 | NM_004523.3 | -5.030 | 0 | kinesin family member 11 (KIF11) | REVIEWED | mRNA |
| SOX4 | NM_003107.2 | -5.720 | 0 | SRY-box 4 (SOX4) | REVIEWED | mRNA |
| TMSB15A | NM_021992.2 | -7.640 | 0 | thymosin beta 15a (TMSB15A) | VALIDATED | mRNA |
| GPR37L1 | XM_011510158.2 | 6.760 | 0 | PREDICTED: Homo sapiens G protein-coupled receptor 37 like 1 (GPR37L1), transcript variant X1 | MODEL | mRNA |
| LMNB1 | NM_001198557.1 | -6.260 | 0 | lamin B1 (LMNB1), transcript variant 2 | REVIEWED | mRNA |
| IGIP | NM_001007189.1 | 3.810 | 0 | IgA inducing protein (IGIP) | VALIDATED | mRNA |
| OMG | NM_002544.4 | 7.130 | 0 | oligodendrocyte myelin glycoprotein (OMG) | VALIDATED | mRNA |
| DCHS1 | NM_003737.3 | -3.090 | 0 | dachsous cadherin-related 1 (DCHS1) | REVIEWED | mRNA |
| SMC4 | XM_011512312.2 | -4.610 | 0 | PREDICTED: Homo sapiens structural maintenance of chromosomes 4 (SMC4), transcript variant X2 | MODEL | mRNA |
| CDK1 | NM_001786.4 | -7.430 | 0 | cyclin dependent kinase 1 (CDK1), transcript variant 1 | REVIEWED | mRNA |
| BCL2L2 | NM_001199839.1 | 2.420 | 0 | BCL2 like 2 (BCL2L2), transcript variant 2 | REVIEWED | mRNA |
| KNL1 | XM_017022432.1 | -5.810 | 0 | PREDICTED: Homo sapiens cancer susceptibility candidate 5 (CASC5), transcript variant X1 | MODEL | mRNA |
| SPC25 | NM_020675.3 | -4.730 | 0 | SPC25, NDC80 kinetochore complex component (SPC25) | REVIEWED | mRNA |
| PTGDS | NM_000954.5 | 12.700 | 0 | prostaglandin D2 synthase (PTGDS) | REVIEWED | mRNA |
| ZBTB47 | NM_145166.3 | 3.470 | 0 | zinc finger and BTB domain containing 47 (ZBTB47) | VALIDATED | mRNA |
| ASPM | NM_018136.4 | -8.930 | 0 | abnormal spindle microtubule assembly (ASPM), transcript variant 1 | REVIEWED | mRNA |
| GPR37L1 | NM_004767.3 | 7.860 | 0 | G protein-coupled receptor 37 like 1 (GPR37L1) | VALIDATED | mRNA |
| ZBED4 | NM_014838.2 | -2.540 | 0 | zinc finger BED-type containing 4 (ZBED4) | VALIDATED | mRNA |
| PLEKHB1 | NM_001130035.1 | 6.330 | 0 | pleckstrin homology domain containing B1 (PLEKHB1), transcript variant 4 | VALIDATED | mRNA |
| UBL3 | NM_007106.3 | 2.270 | 0 | ubiquitin like 3 (UBL3) | VALIDATED | mRNA |
| S100A1 | NM_006271.1 | 8.070 | 0 | S100 calcium binding protein A1 (S100A1) | REVIEWED | mRNA |
| TPPP | NM_007030.2 | 6.500 | 0 | tubulin polymerization promoting protein (TPPP) | VALIDATED | mRNA |
| CCNB2 | NM_004701.3 | -7.540 | 0 | cyclin B2 (CCNB2) | REVIEWED | mRNA |
| LGI3 | NM_139278.2 | 11.100 | 0 | leucine rich repeat LGI family member 3 (LGI3) | VALIDATED | mRNA |
| HNRNPA0 | NM_006805.3 | -2.150 | 0 | heterogeneous nuclear ribonucleoprotein A0 (HNRNPA0) | REVIEWED | mRNA |
| HMGB3 | NM_005342.3 | -3.370 | 0 | high mobility group box 3 (HMGB3), transcript variant 2 | REVIEWED | mRNA |
| NDC80 | NM_006101.2 | -7.790 | 0 | NDC80, kinetochore complex component (NDC80) | VALIDATED | mRNA |
| LMNB2 | NM_032737.3 | -2.650 | 0 | lamin B2 (LMNB2) | REVIEWED | mRNA |
| NUSAP1 | XM_005254430.4 | -9.560 | 0 | PREDICTED: Homo sapiens nucleolar and spindle associated protein 1 (NUSAP1), transcript variant X5 | MODEL | mRNA |
| RRM2 | NM_001034.3 | -6.310 | 0 | ribonucleotide reductase regulatory subunit M2 (RRM2), transcript variant 2 | REVIEWED | mRNA |
| ZNF365 | NM_014951.2 | 6.400 | 0 | zinc finger protein 365 (ZNF365), transcript variant A | REVIEWED | mRNA |
| PRNP | NM_001080123.2 | 4.120 | 0 | prion protein (PRNP), transcript variant 5 | REVIEWED | mRNA |
| TACC3 | NM_006342.2 | -4.630 | 0 | transforming acidic coiled-coil containing protein 3 (TACC3) | REVIEWED | mRNA |
| LDLRAD4 | NM_001003674.3 | 8.660 | 0 | low density lipoprotein receptor class A domain containing 4 (LDLRAD4), transcript variant c1 | VALIDATED | mRNA |
| H1F0 | NM_005318.3 | -2.740 | 0 | H1 histone family member 0 (H1F0) | REVIEWED | mRNA |
| ENDOD1 | NM_015036.2 | 5.780 | 0 | endonuclease domain containing 1 (ENDOD1) | VALIDATED | mRNA |
| ESCO2 | NM_001017420.2 | -6.640 | 0 | establishment of sister chromatid cohesion N-acetyltransferase 2 (ESCO2) | REVIEWED | mRNA |
| RCOR2 | NM_173587.3 | -4.500 | 0 | REST corepressor 2 (RCOR2) | VALIDATED | mRNA |
| PTTG1 | NM_004219.3 | -5.350 | 0 | pituitary tumor-transforming 1 (PTTG1), transcript variant 2 | REVIEWED | mRNA |
| FOXO3B | NR_026718.1 | -3.080 | 0 | forkhead box O3B pseudogene (FOXO3B) | PROVISIONAL | non-coding RNA |
| GTSE1 | NM_016426.6 | -7.010 | 0 | G2 and S-phase expressed 1 (GTSE1) | REVIEWED | mRNA |
| TF | NM_001063.3 | 9.490 | 0 | transferrin (TF) | REVIEWED | mRNA |
| EOMES | NM_001278183.1 | -11.800 | 0 | eomesodermin (EOMES), transcript variant 3 | REVIEWED | mRNA |
| QSER1 | NM_001076786.2 | -2.810 | 0 | glutamine and serine rich 1 (QSER1) | VALIDATED | mRNA |
| MOBP | NR_003090.2 | 9.730 | 0 | myelin-associated oligodendrocyte basic protein (MOBP), transcript variant 4 | VALIDATED | non-coding RNA |
| HDAC2 | XM_011535788.1 | -2.460 | 0 | PREDICTED: Homo sapiens histone deacetylase 2 (HDAC2), transcript variant X1 | MODEL | mRNA |
This table shows the 50 most significant genes (out of 10052) differentially expressed between control adult samples and fetal samples.
genes_control_fetal_adult <- signifAdult %>%
mutate(gene = text_spec(symbol, link = paste0('http://www.genecards.org/cgi-bin/carddisp.pl?gene=', symbol)),
log2FoldChange = ifelse(log2FoldChange < 0,
color_tile("red", "white")(signif(log2FoldChange, 3)),
color_tile("white", "darkgreen")(signif(log2FoldChange, 3))),
transcript = target_id) %>%
dplyr::select(c('gene', 'transcript', 'log2FoldChange', 'padj', 'description', 'status', 'molecule_type')) %>%
head(n = 50)
genes_control_fetal_adult %>%
kable("html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 8, full_width = F)
| gene | transcript | log2FoldChange | padj | description | status | molecule_type |
|---|---|---|---|---|---|---|
| SOX11 | NM_003108.3 | -8.090 | 0 | SRY-box 11 (SOX11) | REVIEWED | mRNA |
| MEX3A | NM_001093725.1 | -6.570 | 0 | mex-3 RNA binding family member A (MEX3A) | VALIDATED | mRNA |
| SOX4 | NM_003107.2 | -6.720 | 0 | SRY-box 4 (SOX4) | REVIEWED | mRNA |
| GPC2 | NM_152742.2 | -5.070 | 0 | glypican 2 (GPC2) | VALIDATED | mRNA |
| LMNB1 | NM_005573.3 | -6.480 | 0 | lamin B1 (LMNB1), transcript variant 1 | REVIEWED | mRNA |
| KIF11 | NM_004523.3 | -4.940 | 0 | kinesin family member 11 (KIF11) | REVIEWED | mRNA |
| DCHS1 | NM_003737.3 | -3.860 | 0 | dachsous cadherin-related 1 (DCHS1) | REVIEWED | mRNA |
| GPR37L1 | XM_011510158.2 | 6.440 | 0 | PREDICTED: Homo sapiens G protein-coupled receptor 37 like 1 (GPR37L1), transcript variant X1 | MODEL | mRNA |
| IGIP | NM_001007189.1 | 4.090 | 0 | IgA inducing protein (IGIP) | VALIDATED | mRNA |
| LMNB1 | NM_001198557.1 | -6.670 | 0 | lamin B1 (LMNB1), transcript variant 2 | REVIEWED | mRNA |
| OMG | NM_002544.4 | 7.970 | 0 | oligodendrocyte myelin glycoprotein (OMG) | VALIDATED | mRNA |
| TMSB15A | NM_021992.2 | -8.510 | 0 | thymosin beta 15a (TMSB15A) | VALIDATED | mRNA |
| SMC4 | XM_011512312.2 | -5.010 | 0 | PREDICTED: Homo sapiens structural maintenance of chromosomes 4 (SMC4), transcript variant X2 | MODEL | mRNA |
| HNRNPA0 | NM_006805.3 | -2.570 | 0 | heterogeneous nuclear ribonucleoprotein A0 (HNRNPA0) | REVIEWED | mRNA |
| BCL2L2 | NM_001199839.1 | 2.550 | 0 | BCL2 like 2 (BCL2L2), transcript variant 2 | REVIEWED | mRNA |
| HMGB3 | NM_005342.3 | -4.070 | 0 | high mobility group box 3 (HMGB3), transcript variant 2 | REVIEWED | mRNA |
| UBL3 | NM_007106.3 | 2.620 | 0 | ubiquitin like 3 (UBL3) | VALIDATED | mRNA |
| S100A1 | NM_006271.1 | 9.270 | 0 | S100 calcium binding protein A1 (S100A1) | REVIEWED | mRNA |
| ENDOD1 | NM_015036.2 | 7.060 | 0 | endonuclease domain containing 1 (ENDOD1) | VALIDATED | mRNA |
| PTGDS | NM_000954.5 | 13.000 | 0 | prostaglandin D2 synthase (PTGDS) | REVIEWED | mRNA |
| PLEKHB1 | NM_001130035.1 | 6.890 | 0 | pleckstrin homology domain containing B1 (PLEKHB1), transcript variant 4 | VALIDATED | mRNA |
| TPPP | NM_007030.2 | 7.110 | 0 | tubulin polymerization promoting protein (TPPP) | VALIDATED | mRNA |
| CDK1 | NM_001786.4 | -7.230 | 0 | cyclin dependent kinase 1 (CDK1), transcript variant 1 | REVIEWED | mRNA |
| LMNB2 | NM_032737.3 | -2.960 | 0 | lamin B2 (LMNB2) | REVIEWED | mRNA |
| DBN1 | NM_004395.3 | -4.620 | 0 | drebrin 1 (DBN1), transcript variant 1 | REVIEWED | mRNA |
| GPR37L1 | NM_004767.3 | 8.010 | 0 | G protein-coupled receptor 37 like 1 (GPR37L1) | VALIDATED | mRNA |
| ASPM | NM_018136.4 | -8.210 | 0 | abnormal spindle microtubule assembly (ASPM), transcript variant 1 | REVIEWED | mRNA |
| KNL1 | XM_017022432.1 | -5.530 | 0 | PREDICTED: Homo sapiens cancer susceptibility candidate 5 (CASC5), transcript variant X1 | MODEL | mRNA |
| LGI3 | NM_139278.2 | 11.800 | 0 | leucine rich repeat LGI family member 3 (LGI3) | VALIDATED | mRNA |
| TF | NM_001063.3 | 11.000 | 0 | transferrin (TF) | REVIEWED | mRNA |
| ZBTB47 | NM_145166.3 | 3.370 | 0 | zinc finger and BTB domain containing 47 (ZBTB47) | VALIDATED | mRNA |
| LDLRAD4 | NM_001003674.3 | 9.640 | 0 | low density lipoprotein receptor class A domain containing 4 (LDLRAD4), transcript variant c1 | VALIDATED | mRNA |
| MOBP | NR_003090.2 | 11.100 | 0 | myelin-associated oligodendrocyte basic protein (MOBP), transcript variant 4 | VALIDATED | non-coding RNA |
| DESI1 | NM_015704.2 | 1.890 | 0 | desumoylating isopeptidase 1 (DESI1) | VALIDATED | mRNA |
| TACC3 | NM_006342.2 | -5.510 | 0 | transforming acidic coiled-coil containing protein 3 (TACC3) | REVIEWED | mRNA |
| NONO | NM_007363.4 | -2.180 | 0 | non-POU domain containing octamer binding (NONO), transcript variant 2 | REVIEWED | mRNA |
| ENPP4 | NM_014936.4 | 5.480 | 0 | ectonucleotide pyrophosphatase/phosphodiesterase 4 (putative) (ENPP4) | VALIDATED | mRNA |
| RRM2 | NM_001034.3 | -7.040 | 0 | ribonucleotide reductase regulatory subunit M2 (RRM2), transcript variant 2 | REVIEWED | mRNA |
| H1F0 | NM_005318.3 | -2.950 | 0 | H1 histone family member 0 (H1F0) | REVIEWED | mRNA |
| ZNF365 | NM_014951.2 | 6.680 | 0 | zinc finger protein 365 (ZNF365), transcript variant A | REVIEWED | mRNA |
| SEC14L5 | NM_014692.1 | 8.960 | 0 | SEC14 like lipid binding 5 (SEC14L5) | VALIDATED | mRNA |
| SPC25 | NM_020675.3 | -3.830 | 0 | SPC25, NDC80 kinetochore complex component (SPC25) | REVIEWED | mRNA |
| PRNP | NM_001080123.2 | 4.250 | 0 | prion protein (PRNP), transcript variant 5 | REVIEWED | mRNA |
| CNP | NM_033133.4 | 6.920 | 0 | 2’,3’-cyclic nucleotide 3’ phosphodiesterase (CNP), transcript variant 1 | VALIDATED | mRNA |
| LRRC3B | NM_001317808.1 | -4.470 | 0 | leucine rich repeat containing 3B (LRRC3B), transcript variant 2 | REVIEWED | mRNA |
| RBM15B | NM_013286.4 | -2.090 | 0 | RNA binding motif protein 15B (RBM15B) | VALIDATED | mRNA |
| MEX3B | NM_032246.4 | -5.070 | 0 | mex-3 RNA binding family member B (MEX3B) | REVIEWED | mRNA |
| HNRNPA1 | NM_002136.3 | -2.510 | 0 | heterogeneous nuclear ribonucleoprotein A1 (HNRNPA1), transcript variant 1 | REVIEWED | mRNA |
| TMEM151A | NM_153266.3 | 7.140 | 0 | transmembrane protein 151A (TMEM151A) | VALIDATED | mRNA |
| EFNB1 | NM_004429.4 | -4.480 | 0 | ephrin B1 (EFNB1) | REVIEWED | mRNA |
This table shows the 50 most significant genes (out of 10335) differentially expressed between autism child samples and fetal samples.
genes_autism_fetal_child <- signifChildA %>%
mutate(gene = text_spec(symbol, link = paste0('http://www.genecards.org/cgi-bin/carddisp.pl?gene=', symbol)),
log2FoldChange = ifelse(log2FoldChange < 0,
color_tile("red", "white")(signif(log2FoldChange, 3)),
color_tile("white", "darkgreen")(signif(log2FoldChange, 3))),
transcript = target_id) %>%
dplyr::select(c('gene', 'transcript', 'log2FoldChange', 'padj', 'description', 'status', 'molecule_type')) %>%
head(n = 50)
genes_autism_fetal_child %>%
kable("html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 8, full_width = F)
| gene | transcript | log2FoldChange | padj | description | status | molecule_type |
|---|---|---|---|---|---|---|
| SOX11 | NM_003108.3 | -6.800 | 0 | SRY-box 11 (SOX11) | REVIEWED | mRNA |
| GPC2 | NM_152742.2 | -5.920 | 0 | glypican 2 (GPC2) | VALIDATED | mRNA |
| MEX3A | NM_001093725.1 | -6.190 | 0 | mex-3 RNA binding family member A (MEX3A) | VALIDATED | mRNA |
| LMNB1 | NM_005573.3 | -6.230 | 0 | lamin B1 (LMNB1), transcript variant 1 | REVIEWED | mRNA |
| KIF11 | NM_004523.3 | -5.560 | 0 | kinesin family member 11 (KIF11) | REVIEWED | mRNA |
| SOX4 | NM_003107.2 | -5.550 | 0 | SRY-box 4 (SOX4) | REVIEWED | mRNA |
| GPR37L1 | XM_011510158.2 | 6.930 | 0 | PREDICTED: Homo sapiens G protein-coupled receptor 37 like 1 (GPR37L1), transcript variant X1 | MODEL | mRNA |
| DCHS1 | NM_003737.3 | -3.590 | 0 | dachsous cadherin-related 1 (DCHS1) | REVIEWED | mRNA |
| TMSB15A | NM_021992.2 | -7.970 | 0 | thymosin beta 15a (TMSB15A) | VALIDATED | mRNA |
| SMC4 | XM_011512312.2 | -5.330 | 0 | PREDICTED: Homo sapiens structural maintenance of chromosomes 4 (SMC4), transcript variant X2 | MODEL | mRNA |
| LMNB1 | NM_001198557.1 | -6.110 | 0 | lamin B1 (LMNB1), transcript variant 2 | REVIEWED | mRNA |
| IGIP | NM_001007189.1 | 3.850 | 0 | IgA inducing protein (IGIP) | VALIDATED | mRNA |
| OMG | NM_002544.4 | 7.260 | 0 | oligodendrocyte myelin glycoprotein (OMG) | VALIDATED | mRNA |
| NUSAP1 | XM_005254430.4 | -7.850 | 0 | PREDICTED: Homo sapiens nucleolar and spindle associated protein 1 (NUSAP1), transcript variant X5 | MODEL | mRNA |
| BCL2L2 | NM_001199839.1 | 2.450 | 0 | BCL2 like 2 (BCL2L2), transcript variant 2 | REVIEWED | mRNA |
| KNL1 | XM_017022432.1 | -6.280 | 0 | PREDICTED: Homo sapiens cancer susceptibility candidate 5 (CASC5), transcript variant X1 | MODEL | mRNA |
| ZBED4 | NM_014838.2 | -2.640 | 0 | zinc finger BED-type containing 4 (ZBED4) | VALIDATED | mRNA |
| CDK1 | NM_001786.4 | -7.540 | 0 | cyclin dependent kinase 1 (CDK1), transcript variant 1 | REVIEWED | mRNA |
| QSER1 | NM_001076786.2 | -3.340 | 0 | glutamine and serine rich 1 (QSER1) | VALIDATED | mRNA |
| ZBTB47 | NM_145166.3 | 3.440 | 0 | zinc finger and BTB domain containing 47 (ZBTB47) | VALIDATED | mRNA |
| SPC25 | NM_020675.3 | -5.140 | 0 | SPC25, NDC80 kinetochore complex component (SPC25) | REVIEWED | mRNA |
| ELAVL2 | NM_001351477.1 | -8.900 | 0 | ELAV like RNA binding protein 2 (ELAVL2), transcript variant 26 | REVIEWED | mRNA |
| FOXO3B | NR_026718.1 | -3.680 | 0 | forkhead box O3B pseudogene (FOXO3B) | PROVISIONAL | non-coding RNA |
| S100A1 | NM_006271.1 | 8.730 | 0 | S100 calcium binding protein A1 (S100A1) | REVIEWED | mRNA |
| GPR37L1 | NM_004767.3 | 7.990 | 0 | G protein-coupled receptor 37 like 1 (GPR37L1) | VALIDATED | mRNA |
| HNRNPA0 | NM_006805.3 | -2.190 | 0 | heterogeneous nuclear ribonucleoprotein A0 (HNRNPA0) | REVIEWED | mRNA |
| PTGDS | NM_000954.5 | 12.300 | 0 | prostaglandin D2 synthase (PTGDS) | REVIEWED | mRNA |
| ASPM | NM_018136.4 | -9.240 | 0 | abnormal spindle microtubule assembly (ASPM), transcript variant 1 | REVIEWED | mRNA |
| PLEKHB1 | NM_001130035.1 | 6.220 | 0 | pleckstrin homology domain containing B1 (PLEKHB1), transcript variant 4 | VALIDATED | mRNA |
| ATP1B1 | NM_001677.3 | 4.440 | 0 | ATPase Na+/K+ transporting subunit beta 1 (ATP1B1) | REVIEWED | mRNA |
| RRM2 | NM_001034.3 | -6.710 | 0 | ribonucleotide reductase regulatory subunit M2 (RRM2), transcript variant 2 | REVIEWED | mRNA |
| HNRNPA1 | NM_002136.3 | -2.530 | 0 | heterogeneous nuclear ribonucleoprotein A1 (HNRNPA1), transcript variant 1 | REVIEWED | mRNA |
| ZNF365 | NM_014951.2 | 6.630 | 0 | zinc finger protein 365 (ZNF365), transcript variant A | REVIEWED | mRNA |
| CCNB2 | NM_004701.3 | -6.730 | 0 | cyclin B2 (CCNB2) | REVIEWED | mRNA |
| PINK1 | NM_032409.2 | 3.110 | 0 | PTEN induced putative kinase 1 (PINK1) | REVIEWED | mRNA |
| PEBP1 | NM_002567.3 | 2.430 | 0 | phosphatidylethanolamine binding protein 1 (PEBP1) | REVIEWED | mRNA |
| TPPP | NM_007030.2 | 6.210 | 0 | tubulin polymerization promoting protein (TPPP) | VALIDATED | mRNA |
| EOMES | NM_001278183.1 | -10.600 | 0 | eomesodermin (EOMES), transcript variant 3 | REVIEWED | mRNA |
| TACC3 | NM_006342.2 | -4.870 | 0 | transforming acidic coiled-coil containing protein 3 (TACC3) | REVIEWED | mRNA |
| PRNP | NM_001080123.2 | 4.100 | 0 | prion protein (PRNP), transcript variant 5 | REVIEWED | mRNA |
| H1F0 | NM_005318.3 | -2.780 | 0 | H1 histone family member 0 (H1F0) | REVIEWED | mRNA |
| LMNB2 | NM_032737.3 | -2.530 | 0 | lamin B2 (LMNB2) | REVIEWED | mRNA |
| SELENOT | NM_016275.4 | 1.940 | 0 | selenoprotein T (SELENOT) | REVIEWED | mRNA |
| NDC80 | NM_006101.2 | -7.550 | 0 | NDC80, kinetochore complex component (NDC80) | VALIDATED | mRNA |
| LGI3 | NM_139278.2 | 11.100 | 0 | leucine rich repeat LGI family member 3 (LGI3) | VALIDATED | mRNA |
| UBL3 | NM_007106.3 | 2.080 | 0 | ubiquitin like 3 (UBL3) | VALIDATED | mRNA |
| DESI1 | NM_015704.2 | 1.740 | 0 | desumoylating isopeptidase 1 (DESI1) | VALIDATED | mRNA |
| FBXO2 | NM_012168.5 | 9.860 | 0 | F-box protein 2 (FBXO2) | REVIEWED | mRNA |
| HCN2 | NM_001194.3 | 7.630 | 0 | hyperpolarization activated cyclic nucleotide gated potassium and sodium channel 2 (HCN2) | REVIEWED | mRNA |
| TF | NM_001063.3 | 9.540 | 0 | transferrin (TF) | REVIEWED | mRNA |
This table shows the 50 most significant genes (out of 9870) differentially expressed between autism adult samples and fetal samples.
genes_autism_fetal_adult <- signifAdultA %>%
mutate(gene = text_spec(symbol, link = paste0('http://www.genecards.org/cgi-bin/carddisp.pl?gene=', symbol)),
log2FoldChange = ifelse(log2FoldChange < 0,
color_tile("red", "white")(signif(log2FoldChange, 3)),
color_tile("white", "darkgreen")(signif(log2FoldChange, 3))),
transcript = target_id) %>%
dplyr::select(c('gene', 'transcript', 'log2FoldChange', 'padj', 'description', 'status', 'molecule_type')) %>%
head(n = 50)
genes_autism_fetal_adult %>%
kable("html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 8, full_width = F)
| gene | transcript | log2FoldChange | padj | description | status | molecule_type |
|---|---|---|---|---|---|---|
| SOX11 | NM_003108.3 | -7.260 | 0 | SRY-box 11 (SOX11) | REVIEWED | mRNA |
| GPC2 | NM_152742.2 | -5.410 | 0 | glypican 2 (GPC2) | VALIDATED | mRNA |
| MEX3A | NM_001093725.1 | -6.540 | 0 | mex-3 RNA binding family member A (MEX3A) | VALIDATED | mRNA |
| SOX4 | NM_003107.2 | -6.700 | 0 | SRY-box 4 (SOX4) | REVIEWED | mRNA |
| LMNB1 | NM_005573.3 | -6.380 | 0 | lamin B1 (LMNB1), transcript variant 1 | REVIEWED | mRNA |
| KIF11 | NM_004523.3 | -5.130 | 0 | kinesin family member 11 (KIF11) | REVIEWED | mRNA |
| IGIP | NM_001007189.1 | 4.080 | 0 | IgA inducing protein (IGIP) | VALIDATED | mRNA |
| GPR37L1 | XM_011510158.2 | 6.390 | 0 | PREDICTED: Homo sapiens G protein-coupled receptor 37 like 1 (GPR37L1), transcript variant X1 | MODEL | mRNA |
| LMNB1 | NM_001198557.1 | -6.580 | 0 | lamin B1 (LMNB1), transcript variant 2 | REVIEWED | mRNA |
| SMC4 | XM_011512312.2 | -5.260 | 0 | PREDICTED: Homo sapiens structural maintenance of chromosomes 4 (SMC4), transcript variant X2 | MODEL | mRNA |
| DCHS1 | NM_003737.3 | -3.400 | 0 | dachsous cadherin-related 1 (DCHS1) | REVIEWED | mRNA |
| OMG | NM_002544.4 | 7.530 | 0 | oligodendrocyte myelin glycoprotein (OMG) | VALIDATED | mRNA |
| HNRNPA0 | NM_006805.3 | -2.700 | 0 | heterogeneous nuclear ribonucleoprotein A0 (HNRNPA0) | REVIEWED | mRNA |
| HMGB3 | NM_005342.3 | -4.080 | 0 | high mobility group box 3 (HMGB3), transcript variant 2 | REVIEWED | mRNA |
| S100A1 | NM_006271.1 | 8.930 | 0 | S100 calcium binding protein A1 (S100A1) | REVIEWED | mRNA |
| BCL2L2 | NM_001199839.1 | 2.410 | 0 | BCL2 like 2 (BCL2L2), transcript variant 2 | REVIEWED | mRNA |
| PTGDS | NM_000954.5 | 12.800 | 0 | prostaglandin D2 synthase (PTGDS) | REVIEWED | mRNA |
| CDK1 | NM_001786.4 | -7.640 | 0 | cyclin dependent kinase 1 (CDK1), transcript variant 1 | REVIEWED | mRNA |
| TPPP | NM_007030.2 | 7.020 | 0 | tubulin polymerization promoting protein (TPPP) | VALIDATED | mRNA |
| TMSB15A | NM_021992.2 | -9.470 | 0 | thymosin beta 15a (TMSB15A) | VALIDATED | mRNA |
| ASPM | NM_018136.4 | -8.280 | 0 | abnormal spindle microtubule assembly (ASPM), transcript variant 1 | REVIEWED | mRNA |
| PLEKHB1 | NM_001130035.1 | 6.690 | 0 | pleckstrin homology domain containing B1 (PLEKHB1), transcript variant 4 | VALIDATED | mRNA |
| KNL1 | XM_017022432.1 | -5.440 | 0 | PREDICTED: Homo sapiens cancer susceptibility candidate 5 (CASC5), transcript variant X1 | MODEL | mRNA |
| H1F0 | NM_005318.3 | -3.110 | 0 | H1 histone family member 0 (H1F0) | REVIEWED | mRNA |
| LMNB2 | NM_032737.3 | -2.830 | 0 | lamin B2 (LMNB2) | REVIEWED | mRNA |
| ZBTB47 | NM_145166.3 | 3.370 | 0 | zinc finger and BTB domain containing 47 (ZBTB47) | VALIDATED | mRNA |
| GPR37L1 | NM_004767.3 | 7.780 | 0 | G protein-coupled receptor 37 like 1 (GPR37L1) | VALIDATED | mRNA |
| LGI3 | NM_139278.2 | 11.600 | 0 | leucine rich repeat LGI family member 3 (LGI3) | VALIDATED | mRNA |
| SPC25 | NM_020675.3 | -4.110 | 0 | SPC25, NDC80 kinetochore complex component (SPC25) | REVIEWED | mRNA |
| ENDOD1 | NM_015036.2 | 6.340 | 0 | endonuclease domain containing 1 (ENDOD1) | VALIDATED | mRNA |
| HNRNPA1 | NM_002136.3 | -2.630 | 0 | heterogeneous nuclear ribonucleoprotein A1 (HNRNPA1), transcript variant 1 | REVIEWED | mRNA |
| UBL3 | NM_007106.3 | 2.250 | 0 | ubiquitin like 3 (UBL3) | VALIDATED | mRNA |
| TF | NM_001063.3 | 10.400 | 0 | transferrin (TF) | REVIEWED | mRNA |
| NONO | NM_007363.4 | -2.120 | 0 | non-POU domain containing octamer binding (NONO), transcript variant 2 | REVIEWED | mRNA |
| TACC3 | NM_006342.2 | -5.050 | 0 | transforming acidic coiled-coil containing protein 3 (TACC3) | REVIEWED | mRNA |
| MOBP | NR_003090.2 | 10.500 | 0 | myelin-associated oligodendrocyte basic protein (MOBP), transcript variant 4 | VALIDATED | non-coding RNA |
| LDLRAD4 | NM_001003674.3 | 8.970 | 0 | low density lipoprotein receptor class A domain containing 4 (LDLRAD4), transcript variant c1 | VALIDATED | mRNA |
| PRNP | NM_001080123.2 | 4.180 | 0 | prion protein (PRNP), transcript variant 5 | REVIEWED | mRNA |
| DESI1 | NM_015704.2 | 1.800 | 0 | desumoylating isopeptidase 1 (DESI1) | VALIDATED | mRNA |
| ZNF365 | NM_014951.2 | 6.350 | 0 | zinc finger protein 365 (ZNF365), transcript variant A | REVIEWED | mRNA |
| PTTG1 | NM_004219.3 | -6.200 | 0 | pituitary tumor-transforming 1 (PTTG1), transcript variant 2 | REVIEWED | mRNA |
| ZBED4 | NM_014838.2 | -2.290 | 0 | zinc finger BED-type containing 4 (ZBED4) | VALIDATED | mRNA |
| DCAF7 | NM_005828.4 | -1.530 | 0 | DDB1 and CUL4 associated factor 7 (DCAF7), transcript variant 1 | REVIEWED | mRNA |
| TMEM151A | NM_153266.3 | 6.940 | 0 | transmembrane protein 151A (TMEM151A) | VALIDATED | mRNA |
| RRM2 | NM_001034.3 | -6.070 | 0 | ribonucleotide reductase regulatory subunit M2 (RRM2), transcript variant 2 | REVIEWED | mRNA |
| MEX3B | NM_032246.4 | -4.780 | 0 | mex-3 RNA binding family member B (MEX3B) | REVIEWED | mRNA |
| SEC14L5 | NM_014692.1 | 8.400 | 0 | SEC14 like lipid binding 5 (SEC14L5) | VALIDATED | mRNA |
| HBG2 | NM_000184.2 | -10.500 | 0 | hemoglobin subunit gamma 2 (HBG2) | REVIEWED | mRNA |
| RBM15B | NM_013286.4 | -1.980 | 0 | RNA binding motif protein 15B (RBM15B) | VALIDATED | mRNA |
| CCNB2 | NM_004701.3 | -5.630 | 0 | cyclin B2 (CCNB2) | REVIEWED | mRNA |
This table shows the 21 most significant genes differentially expressed between control child samples and autism child samples.
genes_control_autism_child <- signifChildCA %>%
mutate(gene = text_spec(symbol, link = paste0('http://www.genecards.org/cgi-bin/carddisp.pl?gene=', symbol)),
log2FoldChange = ifelse(log2FoldChange < 0,
color_tile("red", "white")(signif(log2FoldChange, 3)),
color_tile("white", "darkgreen")(signif(log2FoldChange, 3))),
transcript = target_id) %>%
dplyr::select(c('gene', 'transcript', 'log2FoldChange', 'padj', 'description', 'status', 'molecule_type')) %>%
head(n = 50)
genes_control_autism_child %>%
kable("html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 8, full_width = F)
| gene | transcript | log2FoldChange | padj | description | status | molecule_type |
|---|---|---|---|---|---|---|
| ZSCAN25 | NM_001350984.1 | 4.94 | 0.0040905 | zinc finger and SCAN domain containing 25 (ZSCAN25), transcript variant 7 | VALIDATED | mRNA |
| PIK3R3 | NM_003629.3 | 6.77 | 0.0041154 | phosphoinositide-3-kinase regulatory subunit 3 (PIK3R3), transcript variant 1 | REVIEWED | mRNA |
| BUB1B | NM_001211.5 | 3.27 | 0.0056204 | BUB1 mitotic checkpoint serine/threonine kinase B (BUB1B) | REVIEWED | mRNA |
| ZFP64 | XM_005260449.1 | -6.20 | 0.0165635 | PREDICTED: Homo sapiens ZFP64 zinc finger protein (ZFP64), transcript variant X6 | MODEL | mRNA |
| TTR | NM_000371.3 | 6.99 | 0.0229338 | transthyretin (TTR) | REVIEWED | mRNA |
| ZNF160 | NM_001322136.1 | -5.58 | 0.0229338 | zinc finger protein 160 (ZNF160), transcript variant 12 | REVIEWED | mRNA |
| EXOC1 | NM_018261.3 | -5.82 | 0.0229338 | exocyst complex component 1 (EXOC1), transcript variant 1 | REVIEWED | mRNA |
| RGL1 | XM_017000756.1 | 5.76 | 0.0229338 | PREDICTED: Homo sapiens ral guanine nucleotide dissociation stimulator like 1 (RGL1), transcript variant X3 | MODEL | mRNA |
| PLPP5 | XM_017013905.1 | 4.74 | 0.0229338 | PREDICTED: Homo sapiens phospholipid phosphatase 5 (PLPP5), transcript variant X10 | MODEL | mRNA |
| TCF4 | XM_017025937.1 | 6.29 | 0.0229338 | PREDICTED: Homo sapiens transcription factor 4 (TCF4), transcript variant X5 | MODEL | mRNA |
| LRRC75A-AS1 | NR_027159.1 | -5.32 | 0.0233528 | LRRC75A antisense RNA 1 (LRRC75A-AS1), transcript variant 3 | PREDICTED | long non-coding RNA |
| RAB11FIP1 | NM_025151.4 | 2.15 | 0.0261247 | RAB11 family interacting protein 1 (RAB11FIP1), transcript variant 1 | VALIDATED | mRNA |
| TAGAP | NM_152133.2 | 5.47 | 0.0261247 | T-cell activation RhoGTPase activating protein (TAGAP), transcript variant 1 | REVIEWED | mRNA |
| YPEL2 | XM_017024621.1 | 5.93 | 0.0261247 | PREDICTED: Homo sapiens yippee like 2 (YPEL2), transcript variant X1 | MODEL | mRNA |
| OSBPL2 | XM_017028170.1 | 5.90 | 0.0261247 | PREDICTED: Homo sapiens oxysterol binding protein like 2 (OSBPL2), transcript variant X8 | MODEL | mRNA |
| SFRP1 | NM_003012.4 | 2.10 | 0.0317043 | secreted frizzled related protein 1 (SFRP1) | REVIEWED | mRNA |
| MCM7 | NM_001278595.1 | -5.36 | 0.0430600 | minichromosome maintenance complex component 7 (MCM7), transcript variant 3 | REVIEWED | mRNA |
| A4GALT | XM_005261647.2 | -6.00 | 0.0490829 | PREDICTED: Homo sapiens alpha 1,4-galactosyltransferase (A4GALT), transcript variant X4 | MODEL | mRNA |
| SLC5A5 | XM_011528194.2 | 4.25 | 0.0490829 | PREDICTED: Homo sapiens solute carrier family 5 member 5 (SLC5A5), transcript variant X4 | MODEL | mRNA |
| LOC105377135 | XM_011529809.2 | -3.62 | 0.0490829 | PREDICTED: Homo sapiens transcription initiation factor TFIID subunit 4-like (LOC105377135), transcript variant X2 | MODEL | mRNA |
| ADAM28 | XM_011544371.2 | 2.25 | 0.0490829 | PREDICTED: Homo sapiens ADAM metallopeptidase domain 28 (ADAM28), transcript variant X14 | MODEL | mRNA |
This table shows the 44 most significant genes differentially expressed between control adult samples and autism adult samples.
genes_control_autism_adult <- signifAdultCA %>%
mutate(gene = text_spec(symbol, link = paste0('http://www.genecards.org/cgi-bin/carddisp.pl?gene=', symbol)),
log2FoldChange = ifelse(log2FoldChange < 0,
color_tile("red", "white")(signif(log2FoldChange, 3)),
color_tile("white", "darkgreen")(signif(log2FoldChange, 3))),
transcript = target_id) %>%
dplyr::select(c('gene', 'transcript', 'log2FoldChange', 'padj', 'description', 'status', 'molecule_type')) %>%
head(n = 50)
genes_control_autism_adult %>%
kable("html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 8, full_width = F)
| gene | transcript | log2FoldChange | padj | description | status | molecule_type |
|---|---|---|---|---|---|---|
| FHOD3 | XM_005258355.1 | -7.62 | 0.0000000 | PREDICTED: Homo sapiens formin homology 2 domain containing 3 (FHOD3), transcript variant X13 | MODEL | mRNA |
| LOC107986119 | XR_001750192.1 | 7.26 | 0.0000019 | PREDICTED: Homo sapiens uncharacterized LOC107986119 (LOC107986119), transcript variant X1 | MODEL | ncRNA |
| DLGAP1 | NM_001242764.1 | -6.86 | 0.0000096 | DLG associated protein 1 (DLGAP1), transcript variant 6 | VALIDATED | mRNA |
| AFF2 | NM_001170628.1 | -5.07 | 0.0002494 | AF4/FMR2 family member 2 (AFF2), transcript variant 6 | REVIEWED | mRNA |
| CSE1L | NR_045796.1 | -6.60 | 0.0012660 | chromosome segregation 1 like (CSE1L), transcript variant 3 | REVIEWED | non-coding RNA |
| PRKCE | XM_006712050.3 | -6.74 | 0.0012660 | PREDICTED: Homo sapiens protein kinase C epsilon (PRKCE), transcript variant X17 | MODEL | mRNA |
| LOC107985374 | XR_001735766.1 | 6.06 | 0.0012660 | PREDICTED: Homo sapiens uncharacterized LOC107985374 (LOC107985374) | MODEL | ncRNA |
| ZNF821 | XM_017023411.1 | -5.92 | 0.0019919 | PREDICTED: Homo sapiens zinc finger protein 821 (ZNF821), transcript variant X4 | MODEL | mRNA |
| GRIA1 | XM_017009393.1 | -6.30 | 0.0031578 | PREDICTED: Homo sapiens glutamate ionotropic receptor AMPA type subunit 1 (GRIA1), transcript variant X3 | MODEL | mRNA |
| EIF3C | XM_017023814.1 | 6.78 | 0.0031578 | PREDICTED: Homo sapiens eukaryotic translation initiation factor 3 subunit C (EIF3C), transcript variant X1 | MODEL | mRNA |
| CHD3 | XM_017024066.1 | 6.18 | 0.0031578 | PREDICTED: Homo sapiens chromodomain helicase DNA binding protein 3 (CHD3), transcript variant X11 | MODEL | mRNA |
| SCAF11 | XM_017020221.1 | 6.10 | 0.0039586 | PREDICTED: Homo sapiens SR-related CTD associated factor 11 (SCAF11), transcript variant X8 | MODEL | mRNA |
| ATAD2 | XM_011516996.2 | -2.83 | 0.0040110 | PREDICTED: Homo sapiens ATPase family, AAA domain containing 2 (ATAD2), transcript variant X3 | MODEL | mRNA |
| LSM6 | XR_001741100.1 | 5.61 | 0.0040110 | PREDICTED: Homo sapiens LSM6 homolog, U6 small nuclear RNA and mRNA degradation associated (LSM6), transcript variant X3 | MODEL | misc_RNA |
| PASK | XM_011510835.1 | -6.48 | 0.0047899 | PREDICTED: Homo sapiens PAS domain containing serine/threonine kinase (PASK), transcript variant X14 | MODEL | mRNA |
| CGN | XM_005245365.4 | 7.10 | 0.0082430 | PREDICTED: Homo sapiens cingulin (CGN), transcript variant X1 | MODEL | mRNA |
| WNK2 | XM_011518936.2 | -5.69 | 0.0083023 | PREDICTED: Homo sapiens WNK lysine deficient protein kinase 2 (WNK2), transcript variant X22 | MODEL | mRNA |
| OGT | XM_017029907.1 | -6.15 | 0.0099568 | PREDICTED: Homo sapiens O-linked N-acetylglucosamine (GlcNAc) transferase (OGT), transcript variant X1 | MODEL | mRNA |
| NCAPG2 | NM_017760.6 | 6.04 | 0.0107996 | non-SMC condensin II complex subunit G2 (NCAPG2), transcript variant 1 | REVIEWED | mRNA |
| LIN52 | NM_001024674.2 | 5.67 | 0.0142319 | lin-52 DREAM MuvB core complex component (LIN52) | VALIDATED | mRNA |
| TNFRSF10D | NM_003840.4 | -2.24 | 0.0219061 | TNF receptor superfamily member 10d (TNFRSF10D) | REVIEWED | mRNA |
| STARD8 | XM_011531069.2 | 6.23 | 0.0219061 | PREDICTED: Homo sapiens StAR related lipid transfer domain containing 8 (STARD8), transcript variant X2 | MODEL | mRNA |
| UBA1 | XM_017029780.1 | -5.18 | 0.0219061 | PREDICTED: Homo sapiens ubiquitin like modifier activating enzyme 1 (UBA1), transcript variant X6 | MODEL | mRNA |
| LOC105372462 | XR_919753.2 | -5.10 | 0.0229630 | PREDICTED: Homo sapiens uncharacterized LOC105372462 (LOC105372462), transcript variant X2 | MODEL | ncRNA |
| ACVR1C | NM_001111033.1 | -6.01 | 0.0264439 | activin A receptor type 1C (ACVR1C), transcript variant 4 | VALIDATED | mRNA |
| LOC105369351 | XR_950214.2 | -5.71 | 0.0268767 | PREDICTED: Homo sapiens uncharacterized LOC105369351 (LOC105369351), transcript variant X2 | MODEL | ncRNA |
| SPAST | NM_014946.3 | -4.08 | 0.0313178 | spastin (SPAST), transcript variant 1 | REVIEWED | mRNA |
| CEP170 | XM_011544343.2 | 5.88 | 0.0313178 | PREDICTED: Homo sapiens centrosomal protein 170 (CEP170), transcript variant X23 | MODEL | mRNA |
| TREM2 | XM_006715116.3 | 5.06 | 0.0355194 | PREDICTED: Homo sapiens triggering receptor expressed on myeloid cells 2 (TREM2), transcript variant X1 | MODEL | mRNA |
| SELE | NM_000450.2 | -4.48 | 0.0357277 | selectin E (SELE) | REVIEWED | mRNA |
| GADD45B | NM_015675.3 | -3.43 | 0.0357277 | growth arrest and DNA damage inducible beta (GADD45B) | REVIEWED | mRNA |
| TPX2 | XM_011528697.2 | 4.54 | 0.0381066 | PREDICTED: Homo sapiens TPX2, microtubule nucleation factor (TPX2), transcript variant X1 | MODEL | mRNA |
| PITPNM3 | NM_001165966.1 | -6.13 | 0.0399315 | PITPNM family member 3 (PITPNM3), transcript variant 2 | REVIEWED | mRNA |
| C12orf57 | NM_001301834.1 | -5.14 | 0.0399315 | chromosome 12 open reading frame 57 (C12orf57), transcript variant 2 | REVIEWED | mRNA |
| LINC01896 | XR_935681.2 | -5.38 | 0.0399315 | PREDICTED: Homo sapiens uncharacterized LOC645321 (LOC645321), transcript variant X3 | MODEL | ncRNA |
| LINC01896 | XR_952162.2 | -5.38 | 0.0399315 | PREDICTED: Homo sapiens uncharacterized LOC645321 (LOC645321), transcript variant X3 | MODEL | ncRNA |
| LINC01896 | XR_952517.2 | -5.38 | 0.0399315 | PREDICTED: Homo sapiens uncharacterized LOC645321 (LOC645321), transcript variant X3 | MODEL | ncRNA |
| NFX1 | XR_001746314.1 | -5.34 | 0.0408559 | PREDICTED: Homo sapiens nuclear transcription factor, X-box binding 1 (NFX1), transcript variant X5 | MODEL | misc_RNA |
| FHL2 | NM_001318896.1 | -4.86 | 0.0432861 | four and a half LIM domains 2 (FHL2), transcript variant 8 | REVIEWED | mRNA |
| MIR100HG | NR_137195.1 | -5.62 | 0.0432861 | mir-100-let-7a-2-mir-125b-1 cluster host gene (MIR100HG), transcript variant 22 | REVIEWED | long non-coding RNA |
| KIAA1324 | XM_011541825.1 | -5.33 | 0.0432861 | PREDICTED: Homo sapiens KIAA1324 (KIAA1324), transcript variant X1 | MODEL | mRNA |
| NCKAP5 | XM_017003980.1 | -5.45 | 0.0432861 | PREDICTED: Homo sapiens NCK associated protein 5 (NCKAP5), transcript variant X18 | MODEL | mRNA |
| LOC105370848 | XR_001733412.1 | 6.09 | 0.0432861 | PREDICTED: Homo sapiens uncharacterized LOC105370848 (LOC105370848) | MODEL | ncRNA |
| ANO2 | XM_011520978.2 | -4.42 | 0.0457225 | PREDICTED: Homo sapiens anoctamin 2 (ANO2), transcript variant X5 | MODEL | mRNA |
| LOC101929097 | XM_005259695.2 | -6.17 | 0.0491767 | PREDICTED: Homo sapiens uncharacterized LOC101929097 (LOC101929097) | MODEL | mRNA |
| SOCS3 | NM_003955.4 | -3.89 | 0.0496000 | suppressor of cytokine signaling 3 (SOCS3) | REVIEWED | mRNA |
This table shows the 16 most significant genes differentially expressed between control child samples and autism child samples AND different between control child samples and fetal samples.
genes_dev_control_autism_child <- signifChildCA %>%
filter(target_id %in% devChildCA) %>%
mutate(gene = text_spec(symbol, link = paste0('http://www.genecards.org/cgi-bin/carddisp.pl?gene=', symbol)),
log2FoldChange = ifelse(log2FoldChange < 0,
color_tile("red", "white")(signif(log2FoldChange, 3)),
color_tile("white", "darkgreen")(signif(log2FoldChange, 3))),
transcript = target_id) %>%
dplyr::select(c('gene', 'transcript', 'log2FoldChange', 'padj', 'description', 'status', 'molecule_type'))
genes_dev_control_autism_child %>%
kable("html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 8, full_width = F)
| gene | transcript | log2FoldChange | padj | description | status | molecule_type |
|---|---|---|---|---|---|---|
| BUB1B | NM_001211.5 | 3.27 | 0.0056204 | BUB1 mitotic checkpoint serine/threonine kinase B (BUB1B) | REVIEWED | mRNA |
| ZFP64 | XM_005260449.1 | -6.20 | 0.0165635 | PREDICTED: Homo sapiens ZFP64 zinc finger protein (ZFP64), transcript variant X6 | MODEL | mRNA |
| TTR | NM_000371.3 | 6.99 | 0.0229338 | transthyretin (TTR) | REVIEWED | mRNA |
| ZNF160 | NM_001322136.1 | -5.58 | 0.0229338 | zinc finger protein 160 (ZNF160), transcript variant 12 | REVIEWED | mRNA |
| EXOC1 | NM_018261.3 | -5.82 | 0.0229338 | exocyst complex component 1 (EXOC1), transcript variant 1 | REVIEWED | mRNA |
| RGL1 | XM_017000756.1 | 5.76 | 0.0229338 | PREDICTED: Homo sapiens ral guanine nucleotide dissociation stimulator like 1 (RGL1), transcript variant X3 | MODEL | mRNA |
| PLPP5 | XM_017013905.1 | 4.74 | 0.0229338 | PREDICTED: Homo sapiens phospholipid phosphatase 5 (PLPP5), transcript variant X10 | MODEL | mRNA |
| LRRC75A-AS1 | NR_027159.1 | -5.32 | 0.0233528 | LRRC75A antisense RNA 1 (LRRC75A-AS1), transcript variant 3 | PREDICTED | long non-coding RNA |
| RAB11FIP1 | NM_025151.4 | 2.15 | 0.0261247 | RAB11 family interacting protein 1 (RAB11FIP1), transcript variant 1 | VALIDATED | mRNA |
| TAGAP | NM_152133.2 | 5.47 | 0.0261247 | T-cell activation RhoGTPase activating protein (TAGAP), transcript variant 1 | REVIEWED | mRNA |
| SFRP1 | NM_003012.4 | 2.10 | 0.0317043 | secreted frizzled related protein 1 (SFRP1) | REVIEWED | mRNA |
| MCM7 | NM_001278595.1 | -5.36 | 0.0430600 | minichromosome maintenance complex component 7 (MCM7), transcript variant 3 | REVIEWED | mRNA |
| A4GALT | XM_005261647.2 | -6.00 | 0.0490829 | PREDICTED: Homo sapiens alpha 1,4-galactosyltransferase (A4GALT), transcript variant X4 | MODEL | mRNA |
| SLC5A5 | XM_011528194.2 | 4.25 | 0.0490829 | PREDICTED: Homo sapiens solute carrier family 5 member 5 (SLC5A5), transcript variant X4 | MODEL | mRNA |
| LOC105377135 | XM_011529809.2 | -3.62 | 0.0490829 | PREDICTED: Homo sapiens transcription initiation factor TFIID subunit 4-like (LOC105377135), transcript variant X2 | MODEL | mRNA |
| ADAM28 | XM_011544371.2 | 2.25 | 0.0490829 | PREDICTED: Homo sapiens ADAM metallopeptidase domain 28 (ADAM28), transcript variant X14 | MODEL | mRNA |
This table shows the 35 most significant genes differentially expressed between control adult samples and autism adult samples AND different between control adult samples and fetal samples.
genes_dev_control_autism_adult <- signifAdultCA %>%
filter(target_id %in% devAdultCA) %>%
mutate(gene = text_spec(symbol, link = paste0('http://www.genecards.org/cgi-bin/carddisp.pl?gene=', symbol)),
log2FoldChange = ifelse(log2FoldChange < 0,
color_tile("red", "white")(signif(log2FoldChange, 3)),
color_tile("white", "darkgreen")(signif(log2FoldChange, 3))),
transcript = target_id) %>%
dplyr::select(c('gene', 'transcript', 'log2FoldChange', 'padj', 'description', 'status', 'molecule_type'))
genes_dev_control_autism_adult %>%
kable("html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 8, full_width = F)
| gene | transcript | log2FoldChange | padj | description | status | molecule_type |
|---|---|---|---|---|---|---|
| FHOD3 | XM_005258355.1 | -7.62 | 0.0000000 | PREDICTED: Homo sapiens formin homology 2 domain containing 3 (FHOD3), transcript variant X13 | MODEL | mRNA |
| LOC107986119 | XR_001750192.1 | 7.26 | 0.0000019 | PREDICTED: Homo sapiens uncharacterized LOC107986119 (LOC107986119), transcript variant X1 | MODEL | ncRNA |
| DLGAP1 | NM_001242764.1 | -6.86 | 0.0000096 | DLG associated protein 1 (DLGAP1), transcript variant 6 | VALIDATED | mRNA |
| AFF2 | NM_001170628.1 | -5.07 | 0.0002494 | AF4/FMR2 family member 2 (AFF2), transcript variant 6 | REVIEWED | mRNA |
| CSE1L | NR_045796.1 | -6.60 | 0.0012660 | chromosome segregation 1 like (CSE1L), transcript variant 3 | REVIEWED | non-coding RNA |
| PRKCE | XM_006712050.3 | -6.74 | 0.0012660 | PREDICTED: Homo sapiens protein kinase C epsilon (PRKCE), transcript variant X17 | MODEL | mRNA |
| LOC107985374 | XR_001735766.1 | 6.06 | 0.0012660 | PREDICTED: Homo sapiens uncharacterized LOC107985374 (LOC107985374) | MODEL | ncRNA |
| ZNF821 | XM_017023411.1 | -5.92 | 0.0019919 | PREDICTED: Homo sapiens zinc finger protein 821 (ZNF821), transcript variant X4 | MODEL | mRNA |
| GRIA1 | XM_017009393.1 | -6.30 | 0.0031578 | PREDICTED: Homo sapiens glutamate ionotropic receptor AMPA type subunit 1 (GRIA1), transcript variant X3 | MODEL | mRNA |
| EIF3C | XM_017023814.1 | 6.78 | 0.0031578 | PREDICTED: Homo sapiens eukaryotic translation initiation factor 3 subunit C (EIF3C), transcript variant X1 | MODEL | mRNA |
| CHD3 | XM_017024066.1 | 6.18 | 0.0031578 | PREDICTED: Homo sapiens chromodomain helicase DNA binding protein 3 (CHD3), transcript variant X11 | MODEL | mRNA |
| SCAF11 | XM_017020221.1 | 6.10 | 0.0039586 | PREDICTED: Homo sapiens SR-related CTD associated factor 11 (SCAF11), transcript variant X8 | MODEL | mRNA |
| ATAD2 | XM_011516996.2 | -2.83 | 0.0040110 | PREDICTED: Homo sapiens ATPase family, AAA domain containing 2 (ATAD2), transcript variant X3 | MODEL | mRNA |
| LSM6 | XR_001741100.1 | 5.61 | 0.0040110 | PREDICTED: Homo sapiens LSM6 homolog, U6 small nuclear RNA and mRNA degradation associated (LSM6), transcript variant X3 | MODEL | misc_RNA |
| PASK | XM_011510835.1 | -6.48 | 0.0047899 | PREDICTED: Homo sapiens PAS domain containing serine/threonine kinase (PASK), transcript variant X14 | MODEL | mRNA |
| CGN | XM_005245365.4 | 7.10 | 0.0082430 | PREDICTED: Homo sapiens cingulin (CGN), transcript variant X1 | MODEL | mRNA |
| WNK2 | XM_011518936.2 | -5.69 | 0.0083023 | PREDICTED: Homo sapiens WNK lysine deficient protein kinase 2 (WNK2), transcript variant X22 | MODEL | mRNA |
| OGT | XM_017029907.1 | -6.15 | 0.0099568 | PREDICTED: Homo sapiens O-linked N-acetylglucosamine (GlcNAc) transferase (OGT), transcript variant X1 | MODEL | mRNA |
| TNFRSF10D | NM_003840.4 | -2.24 | 0.0219061 | TNF receptor superfamily member 10d (TNFRSF10D) | REVIEWED | mRNA |
| UBA1 | XM_017029780.1 | -5.18 | 0.0219061 | PREDICTED: Homo sapiens ubiquitin like modifier activating enzyme 1 (UBA1), transcript variant X6 | MODEL | mRNA |
| LOC105372462 | XR_919753.2 | -5.10 | 0.0229630 | PREDICTED: Homo sapiens uncharacterized LOC105372462 (LOC105372462), transcript variant X2 | MODEL | ncRNA |
| ACVR1C | NM_001111033.1 | -6.01 | 0.0264439 | activin A receptor type 1C (ACVR1C), transcript variant 4 | VALIDATED | mRNA |
| SPAST | NM_014946.3 | -4.08 | 0.0313178 | spastin (SPAST), transcript variant 1 | REVIEWED | mRNA |
| TREM2 | XM_006715116.3 | 5.06 | 0.0355194 | PREDICTED: Homo sapiens triggering receptor expressed on myeloid cells 2 (TREM2), transcript variant X1 | MODEL | mRNA |
| GADD45B | NM_015675.3 | -3.43 | 0.0357277 | growth arrest and DNA damage inducible beta (GADD45B) | REVIEWED | mRNA |
| TPX2 | XM_011528697.2 | 4.54 | 0.0381066 | PREDICTED: Homo sapiens TPX2, microtubule nucleation factor (TPX2), transcript variant X1 | MODEL | mRNA |
| PITPNM3 | NM_001165966.1 | -6.13 | 0.0399315 | PITPNM family member 3 (PITPNM3), transcript variant 2 | REVIEWED | mRNA |
| C12orf57 | NM_001301834.1 | -5.14 | 0.0399315 | chromosome 12 open reading frame 57 (C12orf57), transcript variant 2 | REVIEWED | mRNA |
| LINC01896 | XR_935681.2 | -5.38 | 0.0399315 | PREDICTED: Homo sapiens uncharacterized LOC645321 (LOC645321), transcript variant X3 | MODEL | ncRNA |
| LINC01896 | XR_952162.2 | -5.38 | 0.0399315 | PREDICTED: Homo sapiens uncharacterized LOC645321 (LOC645321), transcript variant X3 | MODEL | ncRNA |
| LINC01896 | XR_952517.2 | -5.38 | 0.0399315 | PREDICTED: Homo sapiens uncharacterized LOC645321 (LOC645321), transcript variant X3 | MODEL | ncRNA |
| NFX1 | XR_001746314.1 | -5.34 | 0.0408559 | PREDICTED: Homo sapiens nuclear transcription factor, X-box binding 1 (NFX1), transcript variant X5 | MODEL | misc_RNA |
| FHL2 | NM_001318896.1 | -4.86 | 0.0432861 | four and a half LIM domains 2 (FHL2), transcript variant 8 | REVIEWED | mRNA |
| MIR100HG | NR_137195.1 | -5.62 | 0.0432861 | mir-100-let-7a-2-mir-125b-1 cluster host gene (MIR100HG), transcript variant 22 | REVIEWED | long non-coding RNA |
| KIAA1324 | XM_011541825.1 | -5.33 | 0.0432861 | PREDICTED: Homo sapiens KIAA1324 (KIAA1324), transcript variant X1 | MODEL | mRNA |
| NCKAP5 | XM_017003980.1 | -5.45 | 0.0432861 | PREDICTED: Homo sapiens NCK associated protein 5 (NCKAP5), transcript variant X18 | MODEL | mRNA |
| ANO2 | XM_011520978.2 | -4.42 | 0.0457225 | PREDICTED: Homo sapiens anoctamin 2 (ANO2), transcript variant X5 | MODEL | mRNA |
The list of “developmental” genes that are differentially expressed in Autism vs Control are saved in DESeq2_glist_child.txt and DESeq2_glist_adult.txt (Entrez GeneIDs), in DESeq2_slist_child.txt and DESeq2_slist_adult.txt (gene symbols) while the list of “developmental” transcripts are saved in DESeq2_tlist_child.txt and DESeq2_tlist_adult.txt.
unlink('DESeq2_tlist_child.txt')
unlink('DESeq2_tlist_adult.txt')
unlink('DESeq2_glist_child.txt')
unlink('DESeq2_glist_adult.txt')
unlink('DESeq2_slist_child.txt')
unlink('DESeq2_slist_adult.txt')
lapply(devChildCA[order(devChildCA)], write, 'DESeq2_tlist_child.txt', append=TRUE)
lapply(devAdultCA[order(devAdultCA)], write, 'DESeq2_tlist_adult.txt', append=TRUE)
lapply(devChildCAg[order(devChildCAg)], write, 'DESeq2_glist_child.txt', append=TRUE)
lapply(devAdultCAg[order(devAdultCAg)], write, 'DESeq2_glist_adult.txt', append=TRUE)
lapply(devChildCAs[order(devChildCAs)], write, 'DESeq2_slist_child.txt', append=TRUE)
lapply(devAdultCAs[order(devAdultCAs)], write, 'DESeq2_slist_adult.txt', append=TRUE)
save(file='DESeq2_tlist.RData', devChildCA, devAdultCA)
save(file='DESeq2_glist.RData', devChildCAg, devAdultCAg, devChildCAs, devAdultCAs)
devChildCAup <- intersect((signifChild %>% filter(log2FoldChange > 0))[['target_id']], signifChildCA$target_id)
for (i in devChildCAup) {
obs1 <- assay(rld) %>%
as.data.frame() %>%
rownames_to_column('target_id') %>%
filter(target_id == i) %>%
gather(colnames(assay(rld)), key = 'sample', value = 'est_counts') %>%
left_join(metadata1, by = 'sample') %>%
left_join(ttg, by = 'target_id')
symbol <- obs1$symbol[1]
cat('####', symbol, '\n\n')
g <- ggplot(obs1, aes(x=grp, y=exp(est_counts), color=grp)) +
geom_point(shape = 1, size = 3) +
geom_boxplot() +
labs(title = paste0(obs1$symbol[1],' (',obs1$target_id[1],')'),
subtitle = str_c(str_wrap(obs1$description[1], width=55), collapse="\n"),
x = 'group',
y = 'estimated counts',
fill = "Group") +
theme(legend.position = c(0.9, 0.85),
legend.title = element_text(face = "bold.italic"),
legend.box.background = element_rect(colour = "black", fill = alpha('white', alpha = 0.6)),
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.4),
plot.subtitle = element_text(hjust = 0.4)
)
print(g)
cat('\n\n')
}
devChildCAdown <- intersect((signifChild %>% filter(log2FoldChange < 0))[['target_id']], signifChildCA$target_id)
for (x in devChildCAdown) {
obs1 <- assay(rld) %>%
as.data.frame() %>%
rownames_to_column('target_id') %>%
filter(target_id == x) %>%
gather(colnames(assay(rld)), key = 'sample', value = 'est_counts') %>%
left_join(metadata1, by = 'sample') %>%
left_join(ttg, by = 'target_id')
symbol <- obs1$symbol[1]
cat('####', symbol, '\n\n')
g <- ggplot(obs1, aes(x=grp, y=exp(est_counts), color=grp)) +
geom_point(shape = 1, size = 3) +
geom_boxplot() +
labs(title=paste0(obs1$symbol[1],' (',obs1$target_id[1],')'),
subtitle=str_c(str_wrap(obs1$description[1], width=55), collapse="\n"),
x = 'group',
y = 'estimated counts',
fill = "Group") +
scale_fill_discrete(name = "Group") +
theme(legend.position = c(0.9, 0.85),
legend.title = element_text(face = "bold.italic"),
legend.box.background = element_rect(colour = "black", fill = alpha('white', alpha = 0.6)),
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.4),
plot.subtitle = element_text(hjust = 0.4)
)
print(g)
cat('\n\n')
}
Perform GO analysis using the goseq package, which normalizes GO term counts by transcript length.
refseq.lengths <- read_tsv('/share/db/refseq/human/refseq.108.rna.lengths', col_names = c('target_id', 'length'))
ttg <- ttg %>% full_join(refseq.lengths, by = 'target_id')
gene_lengths <- ttg %>%
dplyr::select(gene_id,length) %>%
filter(!is.na(gene_id)) %>%
group_by(gene_id) %>%
summarise(median_length = median(length)) %>%
dplyr::select(gene_id, median_length) %>%
spread(gene_id, median_length) %>%
as.list() %>%
unlist()
genes <- rep(0, length(unique(na.omit(ttg$gene_id))))
names(genes) <- na.omit(unique(ttg$gene_id))
genes[as.character(unique(signifChildCA$gene_id))] <- 1
pwf <- nullp(DEgenes = genes, genome = NULL, id = NULL, bias.data = gene_lengths)
GO.wall <- goseq(pwf, "hg19", "knownGene")
t1 <- GO.wall %>% filter(over_represented_pvalue < 0.05) %>% group_by(ontology) %>% summarise(n=n())
t2 <- GO.wall %>% filter(ontology == 'BP' & over_represented_pvalue < 0.05 & grepl('(neur[oa])|nerv', term)) %>%
mutate(category = paste0('[',category,'](http://amigo.geneontology.org/amigo/term/',category,')')) %>%
dplyr::select(c('category', 'term', 'numDEInCat', 'numInCat', 'over_represented_pvalue'))
t3 <- GO.wall %>% filter(ontology == 'MF' & over_represented_pvalue < 0.05) %>% head(n=50) %>%
mutate(category = paste0('[',category,'](http://amigo.geneontology.org/amigo/term/',category,')')) %>%
dplyr::select(c('category', 'term', 'numDEInCat', 'numInCat', 'over_represented_pvalue'))
t4 <- GO.wall %>% filter(ontology == 'CC' & over_represented_pvalue < 0.05) %>% head(n=50) %>%
mutate(category = paste0('[',category,'](http://amigo.geneontology.org/amigo/term/',category,')')) %>%
dplyr::select(c('category', 'term', 'numDEInCat', 'numInCat', 'over_represented_pvalue'))
Number of significant categories in different ontologies
kable(t1, format = 'markdown')
| ontology | n |
|---|---|
| BP | 190 |
| CC | 15 |
| MF | 35 |
Summary of biological process terms related to neuronal development
kable(t2, format = 'markdown')
| category | term | numDEInCat | numInCat | over_represented_pvalue |
|---|---|---|---|---|
| GO:0014034 | neural crest cell fate commitment | 1 | 3 | 0.0033456 |
| GO:1904956 | regulation of midbrain dopaminergic neuron differentiation | 1 | 6 | 0.0064146 |
| GO:1904338 | regulation of dopaminergic neuron differentiation | 1 | 11 | 0.0119593 |
| GO:0014029 | neural crest formation | 1 | 12 | 0.0132207 |
| GO:0090179 | planar cell polarity pathway involved in neural tube closure | 1 | 13 | 0.0142758 |
| GO:0090178 | regulation of establishment of planar polarity involved in neural tube closure | 1 | 14 | 0.0152185 |
| GO:0090177 | establishment of planar polarity involved in neural tube closure | 1 | 15 | 0.0162200 |
| GO:1904948 | midbrain dopaminergic neuron differentiation | 1 | 19 | 0.0206566 |
| GO:0071542 | dopaminergic neuron differentiation | 1 | 39 | 0.0416974 |
Summary of the 50th most significant “molecular functions” terms
kable(t3, format = 'markdown')
| category | term | numDEInCat | numInCat | over_represented_pvalue |
|---|---|---|---|---|
| GO:0008507 | sodium:iodide symporter activity | 1 | 1 | 0.0009594 |
| GO:0015373 | monovalent anion:sodium symporter activity | 1 | 1 | 0.0009594 |
| GO:0050512 | lactosylceramide 4-alpha-galactosyltransferase activity | 1 | 1 | 0.0009651 |
| GO:0001011 | transcription factor activity, sequence-specific DNA binding, RNA polymerase recruiting | 1 | 1 | 0.0010870 |
| GO:0001087 | transcription factor activity, TFIIB-class binding | 1 | 1 | 0.0010870 |
| GO:0015111 | iodide transmembrane transporter activity | 1 | 3 | 0.0029095 |
| GO:0001093 | TFIIB-class transcription factor binding | 1 | 3 | 0.0033086 |
| GO:0008321 | Ral guanyl-nucleotide exchange factor activity | 1 | 3 | 0.0033163 |
| GO:0001083 | transcription factor activity, RNA polymerase II basal transcription factor binding | 1 | 5 | 0.0053085 |
| GO:0070324 | thyroid hormone binding | 1 | 6 | 0.0065222 |
| GO:0046935 | 1-phosphatidylinositol-3-kinase regulator activity | 1 | 6 | 0.0065940 |
| GO:0035014 | phosphatidylinositol 3-kinase regulator activity | 1 | 7 | 0.0077892 |
| GO:0015643 | toxic substance binding | 1 | 11 | 0.0116777 |
| GO:0001091 | RNA polymerase II basal transcription factor binding | 1 | 11 | 0.0118417 |
| GO:0008195 | phosphatidate phosphatase activity | 1 | 13 | 0.0146204 |
| GO:0017049 | GTP-Rho binding | 1 | 17 | 0.0173075 |
| GO:0043425 | bHLH transcription factor binding | 1 | 27 | 0.0291582 |
| GO:0015248 | sterol transporter activity | 1 | 28 | 0.0291726 |
| GO:0035250 | UDP-galactosyltransferase activity | 1 | 30 | 0.0320839 |
| GO:0017147 | Wnt-protein binding | 1 | 30 | 0.0323582 |
| GO:0017016 | Ras GTPase binding | 2 | 281 | 0.0361649 |
| GO:0001784 | phosphotyrosine binding | 1 | 35 | 0.0369065 |
| GO:0008378 | galactosyltransferase activity | 1 | 35 | 0.0369817 |
| GO:0004003 | ATP-dependent DNA helicase activity | 1 | 35 | 0.0371952 |
| GO:0001098 | basal transcription machinery binding | 1 | 36 | 0.0381469 |
| GO:0001099 | basal RNA polymerase II transcription machinery binding | 1 | 36 | 0.0381469 |
| GO:0070888 | E-box binding | 1 | 37 | 0.0392161 |
| GO:0005109 | frizzled binding | 1 | 36 | 0.0396567 |
| GO:0031267 | small GTPase binding | 2 | 302 | 0.0411958 |
| GO:0016303 | 1-phosphatidylinositol-3-kinase activity | 1 | 43 | 0.0444385 |
| GO:0015485 | cholesterol binding | 1 | 43 | 0.0448928 |
| GO:0005085 | guanyl-nucleotide exchange factor activity | 2 | 322 | 0.0461261 |
| GO:0045309 | protein phosphorylated amino acid binding | 1 | 44 | 0.0465393 |
| GO:0051020 | GTPase binding | 2 | 330 | 0.0484330 |
| GO:0032934 | sterol binding | 1 | 48 | 0.0498251 |
Summary of the 50th most significant “cellular component” terms
kable(t4, format = 'markdown')
| category | term | numDEInCat | numInCat | over_represented_pvalue |
|---|---|---|---|---|
| GO:0044424 | intracellular part | 20 | 13845 | 0.0027052 |
| GO:0098592 | cytoplasmic side of apical plasma membrane | 1 | 3 | 0.0034805 |
| GO:0005622 | intracellular | 20 | 14144 | 0.0041462 |
| GO:0000778 | condensed nuclear chromosome kinetochore | 1 | 9 | 0.0093613 |
| GO:0042555 | MCM complex | 1 | 11 | 0.0118843 |
| GO:0000940 | condensed chromosome outer kinetochore | 1 | 12 | 0.0133876 |
| GO:0044454 | nuclear chromosome part | 3 | 493 | 0.0149696 |
| GO:0000228 | nuclear chromosome | 3 | 527 | 0.0179541 |
| GO:0000780 | condensed nuclear chromosome, centromeric region | 1 | 19 | 0.0197618 |
| GO:0000145 | exocyst | 1 | 19 | 0.0199270 |
| GO:0005942 | phosphatidylinositol 3-kinase complex | 1 | 20 | 0.0209235 |
| GO:0005680 | anaphase-promoting complex | 1 | 21 | 0.0224898 |
| GO:0051233 | spindle midzone | 1 | 28 | 0.0298132 |
| GO:0000152 | nuclear ubiquitin ligase complex | 1 | 38 | 0.0402079 |
| GO:0005829 | cytosol | 9 | 4749 | 0.0468865 |
myttg <- ttg %>% filter(target_id %in% devChildCA) %>% dplyr::select(c('target_id', 'symbol'))
symbols <- myttg$symbol
names(symbols) <- myttg$target_id
sel_vars <- metadata1 %>%
filter(age == 'Child' | grepl('SVZ', sample)) %>%
dplyr::select('sample') %>%
as.list() %>% unlist() %>% as.vector()
obs <-
counts(ddsWald, normalized=TRUE) %>%
# assay(rld) %>%
as.data.frame() %>%
rownames_to_column('target_id') %>%
filter(target_id %in% devChildCA) %>%
dplyr::select(target_id, sel_vars) %>%
gather(sel_vars, key = 'sample', value = 'est_counts') %>%
spread(target_id, est_counts) %>%
remove_rownames() %>%
column_to_rownames(var="sample") %>%
as.matrix()
colnames(obs) <- symbols[colnames(obs)]
labels <- metadata1 %>%
filter(age == 'Child' | grepl('SVZ', sample)) %>%
dplyr::select(c('sample','age')) %>%
arrange(sample) %>%
remove_rownames() %>%
column_to_rownames(var='sample')
Rowv <- obs %>%
dist(method="euclidean") %>%
hclust(method = "complete") %>%
as.dendrogram %>%
# rotate(c(6:7,1:5)) %>%
color_branches(k = 3) %>%
set("branches_lwd", 4)
obs[which(obs == 0)] <- 1
par(mar=c(5,4,4,2)+0.1)
heatmap.2(obs, Rowv = Rowv, trace="none", margins=c(5,9), srtCol = 45,
symm = F, symkey = F, symbreaks = F, scale="none")
myttg <- ttg %>% filter(target_id %in% devAdultCA) %>% dplyr::select(c('target_id', 'symbol'))
symbols <- myttg$symbol
names(symbols) <- myttg$target_id
sel_vars <- metadata1 %>%
filter(age == 'Adult' | grepl('SVZ', sample)) %>%
dplyr::select('sample') %>%
as.list() %>% unlist() %>% as.vector()
obs <-
counts(ddsWald, normalized=TRUE) %>%
# assay(rld) %>%
as.data.frame() %>%
rownames_to_column('target_id') %>%
filter(target_id %in% devAdultCA) %>%
dplyr::select(target_id, sel_vars) %>%
gather(sel_vars, key = 'sample', value = 'est_counts') %>%
spread(target_id, est_counts) %>%
remove_rownames() %>%
column_to_rownames(var="sample") %>%
as.matrix()
colnames(obs) <- symbols[colnames(obs)]
labels <- metadata1 %>%
filter(age == 'Adult' | grepl('SVZ', sample)) %>%
dplyr::select(c('sample','age')) %>%
arrange(sample) %>%
remove_rownames() %>%
column_to_rownames(var='sample')
Rowv <- obs %>%
dist(method="euclidean") %>%
hclust(method = "complete") %>%
as.dendrogram %>%
# rotate(c(6:7,1:5)) %>%
color_branches(k = 3) %>%
set("branches_lwd", 4)
obs[which(obs == 0)] <- 1
par(mar=c(5,4,4,2)+0.1)
heatmap.2(obs, Rowv = Rowv, trace="none", margins=c(5,9), srtCol = 45,
symm = F, symkey = F, symbreaks = F, scale="none")
This correlation plot uses all the transcripts annotated as PROVISIONAL, REVIEWED or VALIDATED in RefSeq (the high confidence categories)
cor_tlist <- ttg %>% filter(status %in% c('PROVISIONAL', 'REVIEWED', 'VALIDATED')) %>% .[['target_id']]
obs <-
counts(ddsWald, normalized = TRUE) %>%
# assay(vst) %>%
as.data.frame() %>%
rownames_to_column('target_id') %>%
filter(target_id %in% cor_tlist) %>%
gather(colnames(ddsWald), key = 'sample', value = 'est_counts') %>%
full_join(metadata1, by = 'sample') %>%
dplyr::select(c('grp','target_id','est_counts')) %>%
group_by(grp, target_id) %>%
summarise(est_counts = median(est_counts)) %>%
spread(target_id, est_counts) %>%
remove_rownames() %>%
column_to_rownames(var="grp") %>%
t() %>%
as.tibble() %>%
filter_at(vars(matches('Child|Adult')), all_vars(. >= 1))
colnames(obs) <- metadata1 %>%
dplyr::select(grp) %>%
group_by(grp) %>%
summarise(group_count=n()) %>%
mutate(group = paste0(grp,'(',group_count,')')) %>%
.[['group']]
colpal=brewer.pal(n=8, name="RdBu")
corrplot(cor(obs),
method="circle",
type="lower",
col=colpal,
cl.lim=c(0,1),
number.cex = 0.6,
addCoef.col = "black",
tl.col="black",
tl.cex = 0.75,
tl.srt=45,
diag=FALSE)
This correlation plot uses the ‘significant’ transcripts, which are different between the control child samples and the fetal samples.
obs <-
counts(ddsWald, normalized = TRUE) %>%
as.data.frame() %>%
rownames_to_column('target_id') %>%
filter(target_id %in% signifChild$target_id) %>%
gather(colnames(ddsWald), key = 'sample', value = 'est_counts') %>%
full_join(metadata1, by = 'sample') %>%
dplyr::select(c('grp','target_id','est_counts')) %>%
group_by(grp, target_id) %>%
summarise(est_counts = median(est_counts)) %>%
spread(target_id, est_counts) %>%
remove_rownames() %>%
column_to_rownames(var="grp") %>%
t() %>%
as.tibble() %>%
filter_at(vars(matches('Child|Adult')), all_vars(. >= 1))
colnames(obs) <- metadata1 %>%
dplyr::select(grp) %>%
group_by(grp) %>%
summarise(group_count=n()) %>%
mutate(group = paste0(grp,'(',group_count,')')) %>%
.[['group']]
colpal=brewer.pal(n=8, name="RdBu")
corrplot(cor(obs),
method="circle",
type="lower",
col=colpal,
#cl.lim=c(0,1),
number.cex = 0.6,
addCoef.col = "black",
tl.col="black",
tl.cex = 0.75,
tl.srt=45,
diag=FALSE)
obs <-
counts(ddsWald, normalized = TRUE) %>%
# assay(rld) %>%
as.data.frame() %>%
rownames_to_column('target_id') %>%
filter(target_id %in% (signifChild %>% filter(log2FoldChange > 0))[['target_id']]) %>%
gather(colnames(assay(ddsWald)), key = 'sample', value = 'est_counts') %>%
full_join(metadata1, by = 'sample') %>%
dplyr::select(c('grp','target_id','est_counts')) %>%
group_by(grp, target_id) %>%
summarise(est_counts = median(est_counts)) %>%
spread(target_id, est_counts) %>%
remove_rownames() %>%
column_to_rownames(var="grp") %>%
t() %>%
as.tibble() %>%
filter_at(vars(matches('Child|Adult')), all_vars(. >= 1))
colnames(obs) <- metadata1 %>%
dplyr::select(grp) %>%
group_by(grp) %>%
summarise(group_count=n()) %>%
mutate(group = paste0(grp,'(',group_count,')')) %>%
.[['group']]
colpal=brewer.pal(n=8, name="RdBu")
corrplot(cor(obs),
method="circle",
type="lower",
col=colpal,
#cl.lim=c(0,1),
number.cex = 0.6,
addCoef.col = "black",
tl.col="black",
tl.cex = 0.75,
tl.srt=45,
diag=FALSE)
obs <-
counts(ddsWald, normalized=TRUE) %>%
# assay(rld) %>%
as.data.frame() %>%
rownames_to_column('target_id') %>%
filter(target_id %in% (signifChild %>% filter(log2FoldChange < 0))[['target_id']]) %>%
gather(colnames(assay(ddsWald)), key = 'sample', value = 'est_counts') %>%
full_join(metadata1, by = 'sample') %>%
dplyr::select(c('grp','target_id','est_counts')) %>%
group_by(grp, target_id) %>%
summarise(est_counts = median(est_counts)) %>%
spread(target_id, est_counts) %>%
remove_rownames() %>%
column_to_rownames(var="grp") %>%
t() %>%
as.tibble() %>%
filter_at(vars(matches('Child|Adult')), all_vars(. >= 1))
colnames(obs) <- metadata1 %>%
dplyr::select(grp) %>%
group_by(grp) %>%
summarise(group_count=n()) %>%
mutate(group = paste0(grp,'(',group_count,')')) %>%
.[['group']]
colpal=brewer.pal(n=8, name="RdBu")
corrplot(cor(obs),
method="circle",
type="lower",
col=colpal,
#cl.lim=c(0,1),
number.cex = 0.6,
addCoef.col = "black",
tl.col="black",
tl.cex = 0.75,
tl.srt=45,
diag=FALSE)
Select the metadata corresponding to the SVZ tissue samples:
metadata2 <- metadata %>%
filter(grepl('cortex|CP',tissue)) %>%
mutate(condition = factor(condition, levels=c('Control', 'Autism'))) %>%
mutate(grp = factor(paste0(age,condition), levels=c('FetalControl', 'ChildControl', 'ChildAutism', 'AdultControl', 'AdultAutism'))) %>%
arrange(sample) %>%
dplyr::select( c('sample', 'condition', 'library', 'readlength', 'tissue', 'age', 'grp', 'path'))
metadata2
Read the files with the kallisto estimated counts for each of the transcripts in RefSeq version 108 (http://www.ncbi.nlm.nih.gov/genome/annotation_euk/Homo_sapiens/108/).
files <- metadata2 %>% dplyr::select('sample', 'path') %>% spread('sample', 'path') %>% as.list() %>% unlist()
txi.kallisto.cp <- tximport(files, type = "kallisto", txOut = TRUE)
metadata2.df <- metadata2 %>% column_to_rownames(var="sample") %>% as.data.frame()
rownames(txi.kallisto.cp$counts) <- as.character(rownames(txi.kallisto.cp$counts))
grp) with a baseline level corresponding to the Control condition and Fetal age group.dds2 <- DESeqDataSetFromTximport(txi = txi.kallisto.cp, colData = metadata2.df, design = ~grp)
c <- counts(dds2) %>% as.tibble()
c[['target_id']] <- rownames(counts(dds2))
c <- c %>% mutate(sum1 = rowSums(dplyr::select(., starts_with("CP_"))),
sum2 = rowSums(dplyr::select(., starts_with("A_"))),
sum3 = rowSums(dplyr::select(., starts_with("C_"))))
# n1 <- colnames(c) %>% as.tibble() %>% filter(grepl('SVZ', value)) %>% summarise(n()) %>% as.numeric()
# n2 <- colnames(c) %>% as.tibble() %>% filter(grepl('A_', value)) %>% summarise(n()) %>% as.numeric()
# n3 <- colnames(c) %>% as.tibble() %>% filter(grepl('C_', value)) %>% summarise(n()) %>% as.numeric()
c <- c %>% filter((sum1 > 1) & (sum2 > 1) & (sum3 > 1))
dds2 <- dds2[c$target_id,]
rm('c')
ddsWald2 <- DESeq(dds2, test = "Wald", betaPrior = TRUE, parallel = TRUE)
resultsNames(ddsWald2)
## [1] "Intercept" "grpFetalControl" "grpChildAutism" "grpAdultControl"
## [5] "grpAdultAutism"
Extract the results from the model.
# resultsChild2 <- results(ddsWald2, alpha = 0.05,
# contrast = c('grp', 'ChildControl', 'FetalControl'))
resultsAdult2 <- results(ddsWald2, alpha = 0.05,
contrast = c('grp', 'AdultControl', 'FetalControl'))
resultsChildA2 <- results(ddsWald2, alpha = 0.05,
contrast = c('grp', 'ChildAutism', 'FetalControl'))
resultsAdultA2 <- results(ddsWald2, alpha = 0.05,
contrast = c('grp', 'AdultAutism', 'FetalControl'))
# resultsChildCA2 <- results(ddsWald2, alpha = 0.05,
# contrast = c('grp','ChildControl','ChildAutism'))
resultsAdultCA2 <- results(ddsWald2, alpha = 0.05,
contrast = c('grp','AdultControl','AdultAutism'))
# From the DESeq2 vignette:
#
# "Regarding multiple test correction, if a user is planning to contrast all pairs of many levels
# and then selectively reporting the results of only a subset of those pairs, one needs to perform multiple testing across contrasts
# as well as genes to control for this additional form of multiple testing.
# This can be done by using the p.adjust function across a long vector of p values from all pairs of contrasts,
# then re-assigning these adjusted p values to the appropriate results table."
# pChild <- resultsChild %>% as.tibble() %>% rownames_to_column(var='target_id') %>%
# dplyr::select(c('target_id', 'pvalue')) %>% spread('target_id', 'pvalue') %>%
# as.list() %>% unlist()
# pAdult <- resultsAdult %>% as.tibble() %>% rownames_to_column(var='target_id') %>%
# dplyr::select(c('target_id', 'pvalue')) %>% spread('target_id', 'pvalue') %>%
# as.list() %>% unlist()
# signifChild2 <- resultsChild2 %>%
# as.tibble(.) %>% rownames_to_column(var='target_id') %>%
# filter(padj<0.05) %>% arrange(padj) %>%
# left_join(ttg, by = 'target_id') %>%
# dplyr::select(c('target_id', 'baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj', 'gene_id', 'symbol', 'description', 'status', 'molecule_type'))
signifAdult2 <- resultsAdult2 %>%
as.tibble(.) %>% rownames_to_column(var='target_id') %>%
filter(padj<0.05) %>% arrange(padj) %>%
left_join(ttg, by = 'target_id') %>%
dplyr::select(c('target_id', 'baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj', 'gene_id', 'symbol', 'description', 'status', 'molecule_type'))
signifChildA2 <- resultsChildA2 %>%
as.tibble(.) %>% rownames_to_column(var='target_id') %>%
filter(padj<0.05) %>% arrange(padj) %>%
left_join(ttg, by = 'target_id') %>%
dplyr::select(c('target_id', 'baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj', 'gene_id', 'symbol', 'description', 'status', 'molecule_type'))
signifAdultA2 <- resultsAdultA2 %>%
as.tibble(.) %>% rownames_to_column(var='target_id') %>%
filter(padj<0.05) %>% arrange(padj) %>%
left_join(ttg, by = 'target_id') %>%
dplyr::select(c('target_id', 'baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj', 'gene_id', 'symbol', 'description', 'status', 'molecule_type'))
# signifChildCA2 <- resultsChildCA2 %>%
# as.tibble(.) %>% rownames_to_column(var='target_id') %>%
# filter(padj<0.05) %>% arrange(padj) %>%
# left_join(ttg, by = 'target_id') %>%
# dplyr::select(c('target_id', 'baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj', 'gene_id', 'symbol', 'description', 'status', 'molecule_type'))
signifAdultCA2 <- resultsAdultCA2 %>%
as.tibble(.) %>% rownames_to_column(var='target_id') %>%
filter(padj<0.05) %>% arrange(padj) %>%
left_join(ttg, by = 'target_id') %>%
dplyr::select(c('target_id', 'baseMean', 'log2FoldChange', 'lfcSE', 'stat', 'pvalue', 'padj', 'gene_id', 'symbol', 'description', 'status', 'molecule_type'))
# devChildCA2 <- intersect(signifChild2$target_id, signifChildCA2$target_id)
devAdultCA2 <- intersect(signifAdult2$target_id, signifAdultCA2$target_id)
# devChildCAg2 <- tibble(target_id=devChildCA2) %>% left_join(ttg, by='target_id') %>%
# dplyr::select('gene_id') %>% as.list() %>% unlist() %>% unique()
devAdultCAg2 <- tibble(target_id=devAdultCA2) %>% left_join(ttg, by='target_id') %>%
dplyr::select('gene_id') %>% as.list() %>% unlist() %>% unique()
# devChildCAs2 <- tibble(target_id=devChildCA2) %>% left_join(ttg, by='target_id') %>%
# dplyr::select('symbol') %>% as.list() %>% unlist() %>% unique()
devAdultCAs2 <- tibble(target_id=devAdultCA2) %>% left_join(ttg, by='target_id') %>%
dplyr::select('symbol') %>% as.list() %>% unlist() %>% unique()
Summary of the number of transcripts and genes differentially expressed in control samples of different age groups vs. the fetal samples (“developmental genes”):
rsum_control_fetal2 <- tribble( ~Counts, ~Adult,
'Transcripts',
length(signifAdult2$target_id),
'Genes',
length(unique(signifAdult2$gene_id))
)
kable(rsum_control_fetal2, format = 'markdown')
| Counts | Adult |
|---|---|
| Transcripts | 16686 |
| Genes | 11588 |
Venn diagram:
grid.draw(venn.diagram(list(Adult=signifAdult2$target_id),
filename = NULL,
alpha = 0.5,
fill = c("darkorchid1"),
cat.col = c("darkorchid4")
)
)
fill = c("cornflowerblue", "darkorchid1"),
cat.col = c("darkblue", "darkorchid4")
fill = c("cornflowerblue", "darkorchid1"),
cat.col = c("darkblue", "darkorchid4")
Summary of the number of transcripts and genes differentially expressed in autism samples of different age groups vs. the fetal samples:
rsum_autism_fetal2 <- tribble( ~Counts, ~Child, ~Adult,
'Transcripts',
length(signifChildA2$target_id),
length(signifAdultA2$target_id),
'Genes',
length(unique(signifChildA2$gene_id)),
length(unique(signifAdultA2$gene_id))
)
kable(rsum_autism_fetal2, format = 'markdown')
| Counts | Child | Adult |
|---|---|---|
| Transcripts | 13313 | 15374 |
| Genes | 10014 | 10990 |
Venn diagram:
grid.draw(venn.diagram(list(Child=signifChildA2$target_id, Adult=signifAdultA2$target_id),
filename = NULL,
alpha = 0.5,
fill = c("cornflowerblue", "darkorchid1"),
cat.col = c("darkblue", "darkorchid4")
)
)
Summary of the number of transcripts and genes differentially expressed in autism samples of different age groups vs. the same age control samples:
rsum_autism_control2 <- tribble( ~Counts, ~Adult,
'Transcripts',
length(signifAdultCA2$target_id),
'Genes',
length(unique(signifAdultCA2$gene_id))
)
kable(rsum_autism_control2, format = 'markdown')
| Counts | Adult |
|---|---|
| Transcripts | 10334 |
| Genes | 7992 |
Venn diagram:
grid.draw(venn.diagram(list(Adult=signifAdultCA2$target_id),
filename = NULL,
alpha = 0.5,
fill = c("darkorchid1"),
cat.col = c("darkorchid4")
)
)
Summary of the number of transcripts and genes differentially expressed in autism samples of different age groups vs. the same age control samples AND also different in the control age group vs. the fetal samples (“developmental”):
rsum_autism_control_fetal2 <- tribble( ~Counts, ~Adult,
'Transcripts',
length(devAdultCA2),
'Genes',
length(devAdultCAg2)
)
kable(rsum_autism_control_fetal2, format = 'markdown')
| Counts | Adult |
|---|---|
| Transcripts | 6182 |
| Genes | 5251 |
Venn diagram:
grid.draw(venn.diagram(list(Adult=devAdultCA2),
filename = NULL,
alpha = 0.5,
fill = c("darkorchid1"),
cat.col = c("darkorchid4")
)
)
## Transform count data
intgroup = "grp"
rld2 <- rlog(ddsWald2, blind = FALSE)
# vst2 <- varianceStabilizingTransformation(ddsWald2, blind=FALSE)
## Perform PCA analysis and get percent of variance explained
data_pca2 <- plotPCA(rld2, intgroup = intgroup, ntop = 1000, returnData = TRUE)
percentVar2 <- round(100 * attr(data_pca, "percentVar"))
## Make plot
data_pca2 %>%
ggplot(aes_string(x = "PC1", y = "PC2", color = "grp")) +
geom_point(size = 3) +
xlab(paste0("PC1: ", percentVar2[1], "% variance")) +
ylab(paste0("PC2: ", percentVar2[2], "% variance")) +
coord_fixed() +
geom_text(aes(label=name), nudge_x = 1, hjust = 0, size = 3, check_overlap = FALSE)
The above plot shows the first two principal components that explain the variability in the data using the regularized log count data. If you are unfamiliar with principal component analysis, you might want to check the Wikipedia entry or this interactive explanation. In this case, the first and second principal component explain 51 and 9 percent of the variance respectively.
## Obtain the sample euclidean distances
sampleDists2 <- dist(t(assay(rld2)))
sampleDistMatrix2 <- as.matrix(sampleDists2)
## Add names based on intgroup
rownames(sampleDistMatrix2) <- rownames(colData(rld2))
colnames(sampleDistMatrix2) <- NULL
## Define colors to use for the heatmap if none were supplied
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
## Make the heatmap
pheatmap(sampleDistMatrix2, clustering_distance_rows = sampleDists2,
clustering_distance_cols = sampleDists2, color = colors)
This plot shows how samples are clustered based on their euclidean distance using the regularized log transformed count data. This figure gives an overview of how the samples are hierarchically clustered. It is a complementary figure to the PCA plot.
This section contains the MA plots (see Wikipedia) that compare the mean of the normalized counts against the log fold change. They show one point per feature. The points are shown in red if the feature has an adjusted p-value less than alpha, that is, the statistically significant features are shown in red.
## MA plot with alpha used in DESeq2::results()
plotMA(resultsAdult2, alpha = 0.05, main = 'MA plot with alpha = 0.05', ylim = c(-10, 10))
abline(h=c(-1,1),col="dodgerblue",lwd=2)
This plot uses alpha = 0.05, which is the alpha value used to determine which resulting features were significant when running the function DESeq2::results().
This section contains the MA plots (see Wikipedia) that compare the mean of the normalized counts against the log fold change. They show one point per feature. The points are shown in red if the feature has an adjusted p-value less than alpha, that is, the statistically significant features are shown in red.
## MA plot with alpha used in DESeq2::results()
plotMA(resultsChildA2, alpha = 0.05, main = 'MA plot with alpha = 0.05', ylim = c(-10, 10))
abline(h=c(-1,1),col="dodgerblue",lwd=2)
This plot uses alpha = 0.05, which is the alpha value used to determine which resulting features were significant when running the function DESeq2::results().
## MA plot with alpha used in DESeq2::results()
plotMA(resultsAdultA2, alpha = 0.05, main = 'MA plot with alpha = 0.05', ylim = c(-10, 10))
abline(h=c(-1,1),col="dodgerblue",lwd=2)
This plot uses alpha = 0.05, which is the alpha value used to determine which resulting features were significant when running the function DESeq2::results().
This section contains the MA plots (see Wikipedia) that compare the mean of the normalized counts against the log fold change. They show one point per feature. The points are shown in red if the feature has an adjusted p-value less than alpha, that is, the statistically significant features are shown in red.
## MA plot with alpha used in DESeq2::results()
plotMA(resultsAdultCA2, alpha = 0.05, main = 'MA plot with alpha = 0.05', ylim = c(-10, 10))
abline(h=c(-1,1),col="dodgerblue",lwd=2)
This plot uses alpha = 0.05, which is the alpha value used to determine which resulting features were significant when running the function DESeq2::results().
resultsAdult2 %>%
as.tibble() %>%
rownames_to_column(var='target_id') %>%
mutate(sig=ifelse(target_id %in% signifAdult2[['target_id']], "FDR<0.05", "Not Sig")) %>%
left_join(ttg, by = 'target_id') %>%
ggplot(aes(log2FoldChange, -log10(padj))) +
geom_point(aes(col=sig)) +
scale_color_manual(values=c("red", "black"))
#coord_cartesian(ylim=c(0,100))
resultsChildA2 %>%
as.tibble() %>%
rownames_to_column(var='target_id') %>%
mutate(sig=ifelse(target_id %in% signifChildA2[['target_id']], "FDR<0.05", "Not Sig")) %>%
left_join(ttg, by = 'target_id') %>%
ggplot(aes(log2FoldChange, -log10(padj))) +
geom_point(aes(col=sig)) +
scale_color_manual(values=c("red", "black"))
# coord_cartesian(ylim=c(0,100))
resultsAdultA2 %>%
as.tibble() %>%
rownames_to_column(var='target_id') %>%
mutate(sig=ifelse(target_id %in% signifAdultA2[['target_id']], "FDR<0.05", "Not Sig")) %>%
left_join(ttg, by = 'target_id') %>%
ggplot(aes(log2FoldChange, -log10(padj))) +
geom_point(aes(col=sig)) +
scale_color_manual(values=c("red", "black"))
# coord_cartesian(ylim=c(0,100))
r1 <- resultsAdultCA2 %>%
as.tibble() %>%
rownames_to_column(var='target_id') %>%
mutate(sig=ifelse(target_id %in% signifAdultCA2[['target_id']], "FDR<0.05", "Not Sig")) %>%
left_join(ttg, by = 'target_id')
r1 %>% ggplot(aes(log2FoldChange, -log10(padj))) +
geom_point(aes(col=sig)) +
scale_color_manual(values=c("red", "black"))
# coord_cartesian(ylim=c(0,10)) +
# geom_text_repel(data=filter(r1, sig=='FDR<0.05'), aes(label=symbol))
This table shows the 50 most significant genes (out of 11588) differentially expressed between control adult samples and fetal samples.
genes_control_fetal_adult2 <- signifAdult2 %>%
head(n = 50) %>%
mutate(gene = text_spec(symbol, link = paste0('http://www.genecards.org/cgi-bin/carddisp.pl?gene=', symbol)),
log2FoldChange = ifelse(log2FoldChange < 0,
color_tile("red", "white")(signif(log2FoldChange, 3)),
color_tile("white", "darkgreen")(signif(log2FoldChange, 3))),
transcript = target_id) %>%
dplyr::select(c('gene', 'transcript', 'log2FoldChange', 'padj', 'description', 'status', 'molecule_type'))
genes_control_fetal_adult2 %>%
kable("html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 8, full_width = F)
| gene | transcript | log2FoldChange | padj | description | status | molecule_type |
|---|---|---|---|---|---|---|
| MEX3A | NM_001093725.1 | -7.90 | 0 | mex-3 RNA binding family member A (MEX3A) | VALIDATED | mRNA |
| ODC1 | NM_001287190.1 | -2.77 | 0 | ornithine decarboxylase 1 (ODC1), transcript variant 4 | REVIEWED | mRNA |
| PABPC1 | NM_002568.3 | -3.37 | 0 | poly(A) binding protein cytoplasmic 1 (PABPC1) | REVIEWED | mRNA |
| SOX11 | NM_003108.3 | -8.73 | 0 | SRY-box 11 (SOX11) | REVIEWED | mRNA |
| FOXG1 | NM_005249.4 | -3.39 | 0 | forkhead box G1 (FOXG1) | REVIEWED | mRNA |
| SPOCK2 | NM_014767.2 | 6.18 | 0 | SPARC/osteonectin, cwcv and kazal like domains proteoglycan 2 (SPOCK2), transcript variant 2 | REVIEWED | mRNA |
| HDAC2 | XM_011535788.1 | -3.59 | 0 | PREDICTED: Homo sapiens histone deacetylase 2 (HDAC2), transcript variant X1 | MODEL | mRNA |
| GNG2 | XM_011536846.2 | -3.18 | 0 | PREDICTED: Homo sapiens G protein subunit gamma 2 (GNG2), transcript variant X2 | MODEL | mRNA |
| SLA | NM_006748.3 | -6.65 | 0 | Src like adaptor (SLA), transcript variant 3 | VALIDATED | mRNA |
| RBMX | NM_001164803.1 | -2.74 | 0 | RNA binding motif protein, X-linked (RBMX), transcript variant 2 | REVIEWED | mRNA |
| RIMS3 | XM_011542479.2 | 4.03 | 0 | PREDICTED: Homo sapiens regulating synaptic membrane exocytosis 3 (RIMS3), transcript variant X1 | MODEL | mRNA |
| SSBP2 | XM_017009307.1 | -3.71 | 0 | PREDICTED: Homo sapiens single stranded DNA binding protein 2 (SSBP2), transcript variant X5 | MODEL | mRNA |
| SOX4 | NM_003107.2 | -7.40 | 0 | SRY-box 4 (SOX4) | REVIEWED | mRNA |
| EEF1A1 | NM_001402.5 | -3.00 | 0 | eukaryotic translation elongation factor 1 alpha 1 (EEF1A1) | REVIEWED | mRNA |
| WBP2 | NM_012478.3 | 2.40 | 0 | WW domain binding protein 2 (WBP2), transcript variant 1 | REVIEWED | mRNA |
| ALDOC | NM_005165.2 | 6.82 | 0 | aldolase, fructose-bisphosphate C (ALDOC) | REVIEWED | mRNA |
| BACH2 | NM_001170794.1 | -6.52 | 0 | BTB domain and CNC homolog 2 (BACH2), transcript variant 2 | VALIDATED | mRNA |
| RBMX | NM_002139.3 | -2.20 | 0 | RNA binding motif protein, X-linked (RBMX), transcript variant 1 | REVIEWED | mRNA |
| C11orf95 | NM_001144936.1 | -2.98 | 0 | chromosome 11 open reading frame 95 (C11orf95) | PREDICTED | mRNA |
| MLF2 | NM_005439.2 | 1.98 | 0 | myeloid leukemia factor 2 (MLF2), transcript variant 1 | VALIDATED | mRNA |
| DDN | NM_015086.1 | 6.67 | 0 | dendrin (DDN) | VALIDATED | mRNA |
| FAM117B | NM_173511.3 | -3.87 | 0 | family with sequence similarity 117 member B (FAM117B) | VALIDATED | mRNA |
| GNB1 | XM_017001060.1 | -6.84 | 0 | PREDICTED: Homo sapiens G protein subunit beta 1 (GNB1), transcript variant X2 | MODEL | mRNA |
| CBX3 | NM_016587.3 | -2.21 | 0 | chromobox 3 (CBX3), transcript variant 2 | REVIEWED | mRNA |
| ACO2 | NM_001098.2 | 1.99 | 0 | aconitase 2 (ACO2) | REVIEWED | mRNA |
| CAMK2A | NM_171825.2 | 7.47 | 0 | calcium/calmodulin dependent protein kinase II alpha (CAMK2A), transcript variant 2 | REVIEWED | mRNA |
| ITM2B | NM_021999.4 | 1.63 | 0 | integral membrane protein 2B (ITM2B) | REVIEWED | mRNA |
| TOP2B | NM_001068.3 | -2.67 | 0 | topoisomerase (DNA) II beta (TOP2B), transcript variant 2 | REVIEWED | mRNA |
| BCL2L2 | NM_001199839.1 | 3.50 | 0 | BCL2 like 2 (BCL2L2), transcript variant 2 | REVIEWED | mRNA |
| JUND | NM_001286968.1 | 2.62 | 0 | JunD proto-oncogene, AP-1 transcription factor subunit (JUND), transcript variant 1 | REVIEWED | mRNA |
| JUND | NM_005354.5 | 2.62 | 0 | JunD proto-oncogene, AP-1 transcription factor subunit (JUND), transcript variant 1 | VALIDATED | mRNA |
| BCL11A | XM_017004333.1 | -5.49 | 0 | PREDICTED: Homo sapiens B-cell CLL/lymphoma 11A (BCL11A), transcript variant X2 | MODEL | mRNA |
| SETBP1 | NM_015559.2 | -3.44 | 0 | SET binding protein 1 (SETBP1), transcript variant 1 | REVIEWED | mRNA |
| IPO7 | NM_006391.2 | -1.90 | 0 | importin 7 (IPO7) | REVIEWED | mRNA |
| COX5B | NM_001862.2 | 3.10 | 0 | cytochrome c oxidase subunit 5B (COX5B) | REVIEWED | mRNA |
| BCL7A | NM_001024808.2 | -2.19 | 0 | BCL tumor suppressor 7A (BCL7A), transcript variant 2 | REVIEWED | mRNA |
| EEF1G | NM_001404.4 | -2.48 | 0 | eukaryotic translation elongation factor 1 gamma (EEF1G) | REVIEWED | mRNA |
| ALDOA | NM_184041.2 | 4.10 | 0 | aldolase, fructose-bisphosphate A (ALDOA), transcript variant 2 | REVIEWED | mRNA |
| CKB | NM_001823.4 | 2.48 | 0 | creatine kinase B (CKB) | REVIEWED | mRNA |
| MTRNR2L6 | NM_001190487.2 | 3.88 | 0 | MT-RNR2-like 6 (MTRNR2L6) | VALIDATED | mRNA |
| LMNB1 | NM_001198557.1 | -4.70 | 0 | lamin B1 (LMNB1), transcript variant 2 | REVIEWED | mRNA |
| CA11 | NM_001217.4 | 5.56 | 0 | carbonic anhydrase 11 (CA11), transcript variant 1 | REVIEWED | mRNA |
| LINC00461 | NR_024383.1 | -3.80 | 0 | long intergenic non-protein coding RNA 461 (LINC00461), transcript variant 2 | VALIDATED | long non-coding RNA |
| THY1 | NM_001311162.1 | 3.57 | 0 | Thy-1 cell surface antigen (THY1), transcript variant 3 | REVIEWED | mRNA |
| MT3 | NM_005954.2 | 8.33 | 0 | metallothionein 3 (MT3) | VALIDATED | mRNA |
| MTRNR2L1 | NM_001190452.1 | 3.94 | 0 | MT-RNR2-like 1 (MTRNR2L1) | VALIDATED | mRNA |
| TSPYL2 | NM_022117.3 | 5.17 | 0 | TSPY like 2 (TSPYL2) | REVIEWED | mRNA |
| RPLP0 | NM_001002.3 | -1.91 | 0 | ribosomal protein lateral stalk subunit P0 (RPLP0), transcript variant 1 | REVIEWED | mRNA |
| NEAT1 | NR_028272.1 | 5.18 | 0 | nuclear paraspeckle assembly transcript 1 (non-protein coding) (NEAT1), transcript variant MENepsilon | REVIEWED | long non-coding RNA |
| SBK1 | NM_001024401.2 | -3.35 | 0 | SH3 domain binding kinase 1 (SBK1) | VALIDATED | mRNA |
This table shows the 50 most significant genes (out of 10990) differentially expressed between autism adult samples and fetal samples.
genes_autism_fetal_adult2 <- signifAdultA2 %>%
head(n = 50) %>%
mutate(gene = text_spec(symbol, link = paste0('http://www.genecards.org/cgi-bin/carddisp.pl?gene=', symbol)),
log2FoldChange = ifelse(log2FoldChange < 0,
color_tile("red", "white")(signif(log2FoldChange, 3)),
color_tile("white", "darkgreen")(signif(log2FoldChange, 3))),
transcript = target_id) %>%
dplyr::select(c('gene', 'transcript', 'log2FoldChange', 'padj', 'description', 'status', 'molecule_type'))
genes_autism_fetal_adult2 %>%
kable("html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 8, full_width = F)
| gene | transcript | log2FoldChange | padj | description | status | molecule_type |
|---|---|---|---|---|---|---|
| MTRNR2L6 | NM_001190487.2 | 7.05 | 0 | MT-RNR2-like 6 (MTRNR2L6) | VALIDATED | mRNA |
| GNG2 | XM_011536846.2 | -4.27 | 0 | PREDICTED: Homo sapiens G protein subunit gamma 2 (GNG2), transcript variant X2 | MODEL | mRNA |
| NEAT1 | NR_028272.1 | 7.60 | 0 | nuclear paraspeckle assembly transcript 1 (non-protein coding) (NEAT1), transcript variant MENepsilon | REVIEWED | long non-coding RNA |
| MTRNR2L1 | NM_001190452.1 | 7.42 | 0 | MT-RNR2-like 1 (MTRNR2L1) | VALIDATED | mRNA |
| MTRNR2L4 | NM_001190476.1 | 7.56 | 0 | MT-RNR2-like 4 (MTRNR2L4) | VALIDATED | mRNA |
| ITM2B | NM_021999.4 | 2.26 | 0 | integral membrane protein 2B (ITM2B) | REVIEWED | mRNA |
| FOXG1 | NM_005249.4 | -3.82 | 0 | forkhead box G1 (FOXG1) | REVIEWED | mRNA |
| MTRNR2L8 | NM_001190702.1 | 7.37 | 0 | MT-RNR2-like 8 (MTRNR2L8) | VALIDATED | mRNA |
| IGIP | NM_001007189.1 | 5.10 | 0 | IgA inducing protein (IGIP) | VALIDATED | mRNA |
| SEPT3 | NM_019106.5 | -3.50 | 0 | septin 3 (SEPT3), transcript variant B | REVIEWED | mRNA |
| JPT1 | NM_016185.3 | -4.90 | 0 | Jupiter microtubule associated homolog 1 (JPT1), transcript variant 1 | VALIDATED | mRNA |
| MBP | NM_001025092.1 | 15.60 | 0 | myelin basic protein (MBP), transcript variant 4 | REVIEWED | mRNA |
| ALDOC | NM_005165.2 | 7.42 | 0 | aldolase, fructose-bisphosphate C (ALDOC) | REVIEWED | mRNA |
| TUBB | NM_178014.3 | -4.55 | 0 | tubulin beta class I (TUBB), transcript variant 2 | REVIEWED | mRNA |
| JUND | NM_001286968.1 | 2.95 | 0 | JunD proto-oncogene, AP-1 transcription factor subunit (JUND), transcript variant 1 | REVIEWED | mRNA |
| JUND | NM_005354.5 | 2.95 | 0 | JunD proto-oncogene, AP-1 transcription factor subunit (JUND), transcript variant 1 | VALIDATED | mRNA |
| SOX11 | NM_003108.3 | -6.06 | 0 | SRY-box 11 (SOX11) | REVIEWED | mRNA |
| B2M | NM_004048.2 | 5.43 | 0 | beta-2-microglobulin (B2M) | REVIEWED | mRNA |
| MT3 | NM_005954.2 | 8.95 | 0 | metallothionein 3 (MT3) | VALIDATED | mRNA |
| BCL2L2 | NM_001199839.1 | 3.81 | 0 | BCL2 like 2 (BCL2L2), transcript variant 2 | REVIEWED | mRNA |
| ZBTB47 | NM_145166.3 | 4.00 | 0 | zinc finger and BTB domain containing 47 (ZBTB47) | VALIDATED | mRNA |
| SPOCK2 | NM_014767.2 | 4.48 | 0 | SPARC/osteonectin, cwcv and kazal like domains proteoglycan 2 (SPOCK2), transcript variant 2 | REVIEWED | mRNA |
| MTRNR2L2 | NM_001190470.1 | 7.18 | 0 | MT-RNR2-like 2 (MTRNR2L2) | VALIDATED | mRNA |
| C11orf95 | NM_001144936.1 | -3.77 | 0 | chromosome 11 open reading frame 95 (C11orf95) | PREDICTED | mRNA |
| PLP1 | NM_001305004.1 | 13.10 | 0 | proteolipid protein 1 (PLP1), transcript variant 4 | REVIEWED | mRNA |
| CRTAP | NM_006371.4 | 4.09 | 0 | cartilage associated protein (CRTAP) | REVIEWED | mRNA |
| CRMP1 | NM_001313.4 | -4.08 | 0 | collapsin response mediator protein 1 (CRMP1), transcript variant 2 | REVIEWED | mRNA |
| PTGDS | NM_000954.5 | 11.30 | 0 | prostaglandin D2 synthase (PTGDS) | REVIEWED | mRNA |
| CELSR3 | NM_001407.2 | -3.56 | 0 | cadherin EGF LAG seven-pass G-type receptor 3 (CELSR3) | REVIEWED | mRNA |
| SLC6A8 | NM_001142806.1 | 2.82 | 0 | solute carrier family 6 member 8 (SLC6A8), transcript variant 3 | REVIEWED | mRNA |
| NEUROD6 | NM_022728.3 | -6.98 | 0 | neuronal differentiation 6 (NEUROD6) | REVIEWED | mRNA |
| MTRNR2L10 | NM_001190708.1 | 6.47 | 0 | MT-RNR2-like 10 (MTRNR2L10) | VALIDATED | mRNA |
| BCL7A | NM_001024808.2 | -2.93 | 0 | BCL tumor suppressor 7A (BCL7A), transcript variant 2 | REVIEWED | mRNA |
| PLEKHB1 | NM_001130035.1 | 10.90 | 0 | pleckstrin homology domain containing B1 (PLEKHB1), transcript variant 4 | VALIDATED | mRNA |
| FAM102A | NM_203305.2 | 6.21 | 0 | family with sequence similarity 102 member A (FAM102A), transcript variant 2 | VALIDATED | mRNA |
| SPTLC3 | XM_011529280.2 | 7.71 | 0 | PREDICTED: Homo sapiens serine palmitoyltransferase long chain base subunit 3 (SPTLC3), transcript variant X2 | MODEL | mRNA |
| SBK1 | NM_001024401.2 | -4.28 | 0 | SH3 domain binding kinase 1 (SBK1) | VALIDATED | mRNA |
| SPARCL1 | NM_004684.5 | 11.40 | 0 | SPARC like 1 (SPARCL1), transcript variant 2 | VALIDATED | mRNA |
| HDAC2 | XM_011535788.1 | -2.91 | 0 | PREDICTED: Homo sapiens histone deacetylase 2 (HDAC2), transcript variant X1 | MODEL | mRNA |
| MOBP | NR_003090.2 | 12.80 | 0 | myelin-associated oligodendrocyte basic protein (MOBP), transcript variant 4 | VALIDATED | non-coding RNA |
| BCYRN1 | NR_001568.1 | 12.60 | 0 | brain cytoplasmic RNA 1 (BCYRN1) | REVIEWED | long non-coding RNA |
| SHISA4 | NR_030775.1 | 4.61 | 0 | shisa family member 4 (SHISA4), transcript variant 2 | VALIDATED | non-coding RNA |
| FAM117B | NM_173511.3 | -4.40 | 0 | family with sequence similarity 117 member B (FAM117B) | VALIDATED | mRNA |
| MARCH4 | NM_020814.2 | -4.83 | 0 | membrane associated ring-CH-type finger 4 (MARCH4) | VALIDATED | mRNA |
| CERCAM | XM_011518763.2 | 6.84 | 0 | PREDICTED: Homo sapiens cerebral endothelial cell adhesion molecule (CERCAM), transcript variant X4 | MODEL | mRNA |
| MEX3A | NM_001093725.1 | -5.85 | 0 | mex-3 RNA binding family member A (MEX3A) | VALIDATED | mRNA |
| SSBP2 | XM_017009307.1 | -3.46 | 0 | PREDICTED: Homo sapiens single stranded DNA binding protein 2 (SSBP2), transcript variant X5 | MODEL | mRNA |
| HSPB1 | NM_001540.3 | 6.07 | 0 | heat shock protein family B (small) member 1 (HSPB1) | REVIEWED | mRNA |
| ODC1 | NM_001287190.1 | -2.21 | 0 | ornithine decarboxylase 1 (ODC1), transcript variant 4 | REVIEWED | mRNA |
| EEF1G | NM_001404.4 | -2.48 | 0 | eukaryotic translation elongation factor 1 gamma (EEF1G) | REVIEWED | mRNA |
This table shows the 50 most significant genes (out of 7992) differentially expressed between control adult samples and autism adult samples.
genes_control_autism_adult2 <- signifAdultCA2 %>%
head(n = 50) %>%
mutate(gene = text_spec(symbol, link = paste0('http://www.genecards.org/cgi-bin/carddisp.pl?gene=', symbol)),
log2FoldChange = ifelse(log2FoldChange < 0,
color_tile("red", "white")(signif(log2FoldChange, 3)),
color_tile("white", "darkgreen")(signif(log2FoldChange, 3))),
transcript = target_id) %>%
dplyr::select(c('gene', 'transcript', 'log2FoldChange', 'padj', 'description', 'status', 'molecule_type'))
genes_control_autism_adult2 %>%
kable("html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 8, full_width = F)
| gene | transcript | log2FoldChange | padj | description | status | molecule_type |
|---|---|---|---|---|---|---|
| EFHD2 | NM_024329.5 | 2.190 | 0 | EF-hand domain family member D2 (EFHD2) | VALIDATED | mRNA |
| NEAT1 | NR_028272.1 | -2.420 | 0 | nuclear paraspeckle assembly transcript 1 (non-protein coding) (NEAT1), transcript variant MENepsilon | REVIEWED | long non-coding RNA |
| LPAR1 | NM_001351403.1 | -4.210 | 0 | lysophosphatidic acid receptor 1 (LPAR1), transcript variant 9 | REVIEWED | mRNA |
| STMN3 | NM_015894.3 | 4.460 | 0 | stathmin 3 (STMN3), transcript variant 1 | REVIEWED | mRNA |
| RIMS3 | XM_011542479.2 | 3.070 | 0 | PREDICTED: Homo sapiens regulating synaptic membrane exocytosis 3 (RIMS3), transcript variant X1 | MODEL | mRNA |
| JPH4 | NM_001146028.1 | 2.730 | 0 | junctophilin 4 (JPH4), transcript variant 2 | REVIEWED | mRNA |
| RAB3A | NM_002866.4 | 4.110 | 0 | RAB3A, member RAS oncogene family (RAB3A) | VALIDATED | mRNA |
| SHTN1 | XM_017016462.1 | -2.890 | 0 | PREDICTED: Homo sapiens shootin 1 (SHTN1), transcript variant X3 | MODEL | mRNA |
| CLSTN3 | NM_014718.3 | 4.560 | 0 | calsyntenin 3 (CLSTN3) | VALIDATED | mRNA |
| TUBA4A | NM_006000.2 | 3.860 | 0 | tubulin alpha 4a (TUBA4A), transcript variant 1 | REVIEWED | mRNA |
| PHYHIP | NM_014759.3 | 5.890 | 0 | phytanoyl-CoA 2-hydroxylase interacting protein (PHYHIP), transcript variant 2 | VALIDATED | mRNA |
| AP2M1 | NM_004068.3 | 2.030 | 0 | adaptor related protein complex 2 mu 1 subunit (AP2M1), transcript variant 1 | REVIEWED | mRNA |
| MTRNR2L6 | NM_001190487.2 | -3.170 | 0 | MT-RNR2-like 6 (MTRNR2L6) | VALIDATED | mRNA |
| MTRNR2L1 | NM_001190452.1 | -3.480 | 0 | MT-RNR2-like 1 (MTRNR2L1) | VALIDATED | mRNA |
| STXBP1 | NM_001032221.3 | 3.160 | 0 | syntaxin binding protein 1 (STXBP1), transcript variant 2 | REVIEWED | mRNA |
| UCHL1 | NM_004181.4 | 3.350 | 0 | ubiquitin C-terminal hydrolase L1 (UCHL1) | REVIEWED | mRNA |
| ARGLU1 | NM_018011.3 | -1.640 | 0 | arginine and glutamate rich 1 (ARGLU1) | VALIDATED | mRNA |
| SYT5 | XM_006723339.3 | 4.280 | 0 | PREDICTED: Homo sapiens synaptotagmin 5 (SYT5), transcript variant X1 | MODEL | mRNA |
| ATP2A2 | XR_243009.2 | 5.920 | 0 | PREDICTED: Homo sapiens ATPase sarcoplasmic/endoplasmic reticulum Ca2+ transporting 2 (ATP2A2), transcript variant X2 | MODEL | misc_RNA |
| RTN1 | NM_206852.2 | 4.140 | 0 | reticulon 1 (RTN1), transcript variant 3 | REVIEWED | mRNA |
| GLS | NM_014905.4 | 4.700 | 0 | glutaminase (GLS), transcript variant 1 | REVIEWED | mRNA |
| YWHAH | NM_003405.3 | 3.230 | 0 | tyrosine 3-monooxygenase/tryptophan 5-monooxygenase activation protein eta (YWHAH) | REVIEWED | mRNA |
| ANKRD34A | NM_001039888.3 | 2.700 | 0 | ankyrin repeat domain 34A (ANKRD34A) | VALIDATED | mRNA |
| SCRT1 | NM_031309.5 | 3.720 | 0 | scratch family transcriptional repressor 1 (SCRT1) | REVIEWED | mRNA |
| PKM | XM_017022313.1 | 10.200 | 0 | PREDICTED: Homo sapiens pyruvate kinase, muscle (PKM), transcript variant X5 | MODEL | mRNA |
| BASP1 | NM_006317.4 | 5.350 | 0 | brain abundant membrane attached signal protein 1 (BASP1), transcript variant 1 | REVIEWED | mRNA |
| MBP | NM_001025092.1 | -3.410 | 0 | myelin basic protein (MBP), transcript variant 4 | REVIEWED | mRNA |
| CALY | NM_015722.3 | 6.410 | 0 | calcyon neuron specific vesicular protein (CALY), transcript variant 1 | VALIDATED | mRNA |
| RNF141 | NM_016422.3 | -1.840 | 0 | ring finger protein 141 (RNF141) | REVIEWED | mRNA |
| KIF5B | XM_017016224.1 | -3.080 | 0 | PREDICTED: Homo sapiens kinesin family member 5B (KIF5B), transcript variant X1 | MODEL | mRNA |
| GNAO1 | XM_011523003.2 | 7.740 | 0 | PREDICTED: Homo sapiens G protein subunit alpha o1 (GNAO1), transcript variant X1 | MODEL | mRNA |
| GOLGA7 | NM_016099.2 | -2.620 | 0 | golgin A7 (GOLGA7), transcript variant 1 | VALIDATED | mRNA |
| IGIP | NM_001007189.1 | -0.953 | 0 | IgA inducing protein (IGIP) | VALIDATED | mRNA |
| RBMX | NM_001164803.1 | -1.660 | 0 | RNA binding motif protein, X-linked (RBMX), transcript variant 2 | REVIEWED | mRNA |
| PDXP | NM_020315.4 | 2.630 | 0 | pyridoxal phosphatase (PDXP) | VALIDATED | mRNA |
| THY1 | NM_001311162.1 | 2.750 | 0 | Thy-1 cell surface antigen (THY1), transcript variant 3 | REVIEWED | mRNA |
| SYT13 | NM_020826.2 | 3.820 | 0 | synaptotagmin 13 (SYT13), transcript variant 1 | REVIEWED | mRNA |
| SCN3B | NM_001040151.1 | 3.010 | 0 | sodium voltage-gated channel beta subunit 3 (SCN3B), transcript variant 2 | REVIEWED | mRNA |
| NCDN | NM_001014841.1 | 3.290 | 0 | neurochondrin (NCDN), transcript variant 2 | REVIEWED | mRNA |
| TMEM59L | NM_012109.2 | 2.430 | 0 | transmembrane protein 59 like (TMEM59L) | REVIEWED | mRNA |
| SEPT7 | XM_017012862.1 | -1.630 | 0 | PREDICTED: Homo sapiens septin 7 (SEPT7), transcript variant X11 | MODEL | mRNA |
| SYNJ2BP | NM_018373.2 | -1.350 | 0 | synaptojanin 2 binding protein (SYNJ2BP) | VALIDATED | mRNA |
| DOCK1 | XM_017015817.1 | -2.880 | 0 | PREDICTED: Homo sapiens dedicator of cytokinesis 1 (DOCK1), transcript variant X7 | MODEL | mRNA |
| ATP1A3 | NM_001256213.1 | 3.870 | 0 | ATPase Na+/K+ transporting subunit alpha 3 (ATP1A3), transcript variant 2 | REVIEWED | mRNA |
| SPINT2 | NM_021102.3 | 4.400 | 0 | serine peptidase inhibitor, Kunitz type 2 (SPINT2), transcript variant a | REVIEWED | mRNA |
| LOC101930059 | XR_431133.3 | -2.270 | 0 | PREDICTED: Homo sapiens uncharacterized LOC101930059 (LOC101930059), transcript variant X4 | MODEL | ncRNA |
| SEPT3 | NM_019106.5 | 1.770 | 0 | septin 3 (SEPT3), transcript variant B | REVIEWED | mRNA |
| MTRNR2L8 | NM_001190702.1 | -3.560 | 0 | MT-RNR2-like 8 (MTRNR2L8) | VALIDATED | mRNA |
| HPCAL1 | NM_002149.3 | 5.100 | 0 | hippocalcin like 1 (HPCAL1), transcript variant 1 | REVIEWED | mRNA |
| HOOK3 | NM_032410.3 | -1.330 | 0 | hook microtubule tethering protein 3 (HOOK3) | VALIDATED | mRNA |
This table shows the 50 most significant genes (out of 5251) differentially expressed between control adult samples and autism adult samples AND different between control adult samples and fetal samples.
genes_dev_control_autism_adult2 <- signifAdultCA2 %>%
head(n = 50) %>%
filter(target_id %in% devAdultCA2) %>%
mutate(gene = text_spec(symbol, link = paste0('http://www.genecards.org/cgi-bin/carddisp.pl?gene=', symbol)),
log2FoldChange = ifelse(log2FoldChange < 0,
color_tile("red", "white")(signif(log2FoldChange, 3)),
color_tile("white", "darkgreen")(signif(log2FoldChange, 3))),
transcript = target_id) %>%
dplyr::select(c('gene', 'transcript', 'log2FoldChange', 'padj', 'description', 'status', 'molecule_type'))
genes_dev_control_autism_adult2 %>%
kable("html", escape = F) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), font_size = 8, full_width = F)
| gene | transcript | log2FoldChange | padj | description | status | molecule_type |
|---|---|---|---|---|---|---|
| EFHD2 | NM_024329.5 | 2.190 | 0 | EF-hand domain family member D2 (EFHD2) | VALIDATED | mRNA |
| NEAT1 | NR_028272.1 | -2.420 | 0 | nuclear paraspeckle assembly transcript 1 (non-protein coding) (NEAT1), transcript variant MENepsilon | REVIEWED | long non-coding RNA |
| LPAR1 | NM_001351403.1 | -4.210 | 0 | lysophosphatidic acid receptor 1 (LPAR1), transcript variant 9 | REVIEWED | mRNA |
| STMN3 | NM_015894.3 | 4.460 | 0 | stathmin 3 (STMN3), transcript variant 1 | REVIEWED | mRNA |
| RIMS3 | XM_011542479.2 | 3.070 | 0 | PREDICTED: Homo sapiens regulating synaptic membrane exocytosis 3 (RIMS3), transcript variant X1 | MODEL | mRNA |
| JPH4 | NM_001146028.1 | 2.730 | 0 | junctophilin 4 (JPH4), transcript variant 2 | REVIEWED | mRNA |
| RAB3A | NM_002866.4 | 4.110 | 0 | RAB3A, member RAS oncogene family (RAB3A) | VALIDATED | mRNA |
| SHTN1 | XM_017016462.1 | -2.890 | 0 | PREDICTED: Homo sapiens shootin 1 (SHTN1), transcript variant X3 | MODEL | mRNA |
| CLSTN3 | NM_014718.3 | 4.560 | 0 | calsyntenin 3 (CLSTN3) | VALIDATED | mRNA |
| TUBA4A | NM_006000.2 | 3.860 | 0 | tubulin alpha 4a (TUBA4A), transcript variant 1 | REVIEWED | mRNA |
| PHYHIP | NM_014759.3 | 5.890 | 0 | phytanoyl-CoA 2-hydroxylase interacting protein (PHYHIP), transcript variant 2 | VALIDATED | mRNA |
| AP2M1 | NM_004068.3 | 2.030 | 0 | adaptor related protein complex 2 mu 1 subunit (AP2M1), transcript variant 1 | REVIEWED | mRNA |
| MTRNR2L6 | NM_001190487.2 | -3.170 | 0 | MT-RNR2-like 6 (MTRNR2L6) | VALIDATED | mRNA |
| MTRNR2L1 | NM_001190452.1 | -3.480 | 0 | MT-RNR2-like 1 (MTRNR2L1) | VALIDATED | mRNA |
| STXBP1 | NM_001032221.3 | 3.160 | 0 | syntaxin binding protein 1 (STXBP1), transcript variant 2 | REVIEWED | mRNA |
| UCHL1 | NM_004181.4 | 3.350 | 0 | ubiquitin C-terminal hydrolase L1 (UCHL1) | REVIEWED | mRNA |
| ARGLU1 | NM_018011.3 | -1.640 | 0 | arginine and glutamate rich 1 (ARGLU1) | VALIDATED | mRNA |
| SYT5 | XM_006723339.3 | 4.280 | 0 | PREDICTED: Homo sapiens synaptotagmin 5 (SYT5), transcript variant X1 | MODEL | mRNA |
| ATP2A2 | XR_243009.2 | 5.920 | 0 | PREDICTED: Homo sapiens ATPase sarcoplasmic/endoplasmic reticulum Ca2+ transporting 2 (ATP2A2), transcript variant X2 | MODEL | misc_RNA |
| GLS | NM_014905.4 | 4.700 | 0 | glutaminase (GLS), transcript variant 1 | REVIEWED | mRNA |
| YWHAH | NM_003405.3 | 3.230 | 0 | tyrosine 3-monooxygenase/tryptophan 5-monooxygenase activation protein eta (YWHAH) | REVIEWED | mRNA |
| ANKRD34A | NM_001039888.3 | 2.700 | 0 | ankyrin repeat domain 34A (ANKRD34A) | VALIDATED | mRNA |
| SCRT1 | NM_031309.5 | 3.720 | 0 | scratch family transcriptional repressor 1 (SCRT1) | REVIEWED | mRNA |
| PKM | XM_017022313.1 | 10.200 | 0 | PREDICTED: Homo sapiens pyruvate kinase, muscle (PKM), transcript variant X5 | MODEL | mRNA |
| MBP | NM_001025092.1 | -3.410 | 0 | myelin basic protein (MBP), transcript variant 4 | REVIEWED | mRNA |
| CALY | NM_015722.3 | 6.410 | 0 | calcyon neuron specific vesicular protein (CALY), transcript variant 1 | VALIDATED | mRNA |
| KIF5B | XM_017016224.1 | -3.080 | 0 | PREDICTED: Homo sapiens kinesin family member 5B (KIF5B), transcript variant X1 | MODEL | mRNA |
| GOLGA7 | NM_016099.2 | -2.620 | 0 | golgin A7 (GOLGA7), transcript variant 1 | VALIDATED | mRNA |
| IGIP | NM_001007189.1 | -0.953 | 0 | IgA inducing protein (IGIP) | VALIDATED | mRNA |
| RBMX | NM_001164803.1 | -1.660 | 0 | RNA binding motif protein, X-linked (RBMX), transcript variant 2 | REVIEWED | mRNA |
| PDXP | NM_020315.4 | 2.630 | 0 | pyridoxal phosphatase (PDXP) | VALIDATED | mRNA |
| THY1 | NM_001311162.1 | 2.750 | 0 | Thy-1 cell surface antigen (THY1), transcript variant 3 | REVIEWED | mRNA |
| SYT13 | NM_020826.2 | 3.820 | 0 | synaptotagmin 13 (SYT13), transcript variant 1 | REVIEWED | mRNA |
| NCDN | NM_001014841.1 | 3.290 | 0 | neurochondrin (NCDN), transcript variant 2 | REVIEWED | mRNA |
| TMEM59L | NM_012109.2 | 2.430 | 0 | transmembrane protein 59 like (TMEM59L) | REVIEWED | mRNA |
| SEPT7 | XM_017012862.1 | -1.630 | 0 | PREDICTED: Homo sapiens septin 7 (SEPT7), transcript variant X11 | MODEL | mRNA |
| ATP1A3 | NM_001256213.1 | 3.870 | 0 | ATPase Na+/K+ transporting subunit alpha 3 (ATP1A3), transcript variant 2 | REVIEWED | mRNA |
| SPINT2 | NM_021102.3 | 4.400 | 0 | serine peptidase inhibitor, Kunitz type 2 (SPINT2), transcript variant a | REVIEWED | mRNA |
| LOC101930059 | XR_431133.3 | -2.270 | 0 | PREDICTED: Homo sapiens uncharacterized LOC101930059 (LOC101930059), transcript variant X4 | MODEL | ncRNA |
| SEPT3 | NM_019106.5 | 1.770 | 0 | septin 3 (SEPT3), transcript variant B | REVIEWED | mRNA |
| MTRNR2L8 | NM_001190702.1 | -3.560 | 0 | MT-RNR2-like 8 (MTRNR2L8) | VALIDATED | mRNA |
| HPCAL1 | NM_002149.3 | 5.100 | 0 | hippocalcin like 1 (HPCAL1), transcript variant 1 | REVIEWED | mRNA |
| HOOK3 | NM_032410.3 | -1.330 | 0 | hook microtubule tethering protein 3 (HOOK3) | VALIDATED | mRNA |
The list of “developmental” genes that are differentially expressed in Autism vs Control are saved in DESeq2_glist_adult.cortex.txt (Entrez GeneIDs), in DESeq2_slist_adult.cortex.txt (gene symbols) while the list of “developmental” transcripts are saved in DESeq2_tlist_adult.cortex.txt.
unlink('DESeq2_tlist_adult.txt')
unlink('DESeq2_glist_adult.txt')
unlink('DESeq2_slist_adult.txt')
lapply(devAdultCA[order(devAdultCA)], write, 'DESeq2_tlist_adult.cortex.txt', append=TRUE)
lapply(devAdultCAg[order(devAdultCAg)], write, 'DESeq2_glist_adult.cortex.txt', append=TRUE)
lapply(devAdultCAs[order(devAdultCAs)], write, 'DESeq2_slist_adult.cortex.txt', append=TRUE)
save(file='DESeq2_tlist.cortex.RData', devAdultCA2)
save(file='DESeq2_glist.cortex.RData', devAdultCAg, devAdultCAs)
devAdultCAup2 <- intersect((signifAdult2 %>% filter(log2FoldChange > 0))[['target_id']], signifAdultCA2$target_id)
for (i in devAdultCAup2[1:20]) {
obs1 <- assay(rld2) %>%
as.data.frame() %>%
rownames_to_column('target_id') %>%
filter(target_id == i) %>%
gather(colnames(assay(rld2)), key = 'sample', value = 'est_counts') %>%
left_join(metadata2, by = 'sample') %>%
left_join(ttg, by = 'target_id')
symbol <- obs1$symbol[1]
cat('####', symbol, '\n\n')
g <- ggplot(obs1, aes(x=grp, y=exp(est_counts), color=grp)) +
geom_point(shape = 1, size = 3) +
geom_boxplot() +
labs(title = paste0(obs1$symbol[1],' (',obs1$target_id[1],')'),
subtitle = str_c(str_wrap(obs1$description[1], width=55), collapse="\n"),
x = 'group',
y = 'estimated counts',
fill = "Group") +
theme(legend.position = c(0.9, 0.85),
legend.title = element_text(face = "bold.italic"),
legend.box.background = element_rect(colour = "black", fill = alpha('white', alpha = 0.6)),
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.4),
plot.subtitle = element_text(hjust = 0.4)
)
print(g)
cat('\n\n')
}
devAdultCAdown2 <- intersect((signifAdult2 %>% filter(log2FoldChange < 0))[['target_id']], signifAdultCA$target_id)
for (x in devAdultCAdown2) {
obs1 <- assay(rld2) %>%
as.data.frame() %>%
rownames_to_column('target_id') %>%
filter(target_id == x) %>%
gather(colnames(assay(rld2)), key = 'sample', value = 'est_counts') %>%
left_join(metadata2, by = 'sample') %>%
left_join(ttg, by = 'target_id')
symbol <- obs1$symbol[1]
cat('####', symbol, '\n\n')
g <- ggplot(obs1, aes(x=grp, y=exp(est_counts), color=grp)) +
geom_point(shape = 1, size = 3) +
geom_boxplot() +
labs(title=paste0(obs1$symbol[1],' (',obs1$target_id[1],')'),
subtitle=str_c(str_wrap(obs1$description[1], width=55), collapse="\n"),
x = 'group',
y = 'estimated counts',
fill = "Group") +
scale_fill_discrete(name = "Group") +
theme(legend.position = c(0.9, 0.85),
legend.title = element_text(face = "bold.italic"),
legend.box.background = element_rect(colour = "black", fill = alpha('white', alpha = 0.6)),
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.4),
plot.subtitle = element_text(hjust = 0.4)
)
print(g)
cat('\n\n')
}
Perform GO analysis using the goseq package, which normalizes GO term counts by transcript length.
ttgfile <- '/share/db/refseq/human/refseq.108.rna.t2g.txt'
ttg <- read_tsv(ttgfile)
refseq.lengths <- read_tsv('/share/db/refseq/human/refseq.108.rna.lengths', col_names = c('target_id', 'length'))
ttg <- ttg %>% full_join(refseq.lengths, by = 'target_id')
gene_lengths <- ttg %>%
dplyr::select(gene_id,length) %>%
filter(!is.na(gene_id)) %>%
group_by(gene_id) %>%
summarise(median_length = median(length)) %>%
dplyr::select(gene_id, median_length) %>%
spread(gene_id, median_length) %>%
as.list() %>%
unlist()
genes <- rep(0, length(unique(na.omit(ttg$gene_id))))
names(genes) <- unique(na.omit(ttg$gene_id))
genes[as.character(unique(na.omit(signifAdultCA2$gene_id)))] <- 1
pwf <- nullp(DEgenes = genes, genome = NULL, id = NULL, bias.data = gene_lengths)
GO.wall <- goseq(pwf, "hg19", "knownGene")
t1 <- GO.wall %>% filter(over_represented_pvalue < 0.05) %>% group_by(ontology) %>% summarise(n=n())
t2 <- GO.wall %>% filter(ontology == 'BP' & over_represented_pvalue < 0.05 & grepl('(neur[oa])|nerv', term)) %>%
mutate(category = paste0('[',category,'](http://amigo.geneontology.org/amigo/term/',category,')')) %>%
dplyr::select(c('category', 'term', 'numDEInCat', 'numInCat', 'over_represented_pvalue'))
t3 <- GO.wall %>% filter(ontology == 'MF' & over_represented_pvalue < 0.05) %>% head(n=50) %>%
mutate(category = paste0('[',category,'](http://amigo.geneontology.org/amigo/term/',category,')')) %>%
dplyr::select(c('category', 'term', 'numDEInCat', 'numInCat', 'over_represented_pvalue'))
t4 <- GO.wall %>% filter(ontology == 'CC' & over_represented_pvalue < 0.05) %>% head(n=50) %>%
mutate(category = paste0('[',category,'](http://amigo.geneontology.org/amigo/term/',category,')')) %>%
dplyr::select(c('category', 'term', 'numDEInCat', 'numInCat', 'over_represented_pvalue'))
Number of significant categories in different ontologies
kable(t1, format = 'markdown')
| ontology | n |
|---|---|
| BP | 1914 |
| CC | 397 |
| MF | 388 |
Summary of biological process terms related to neuronal development
kable(t2, format = 'markdown')
| category | term | numDEInCat | numInCat | over_represented_pvalue |
|---|---|---|---|---|
| GO:0007399 | nervous system development | 1081 | 2195 | 0.0000000 |
| GO:0031175 | neuron projection development | 478 | 829 | 0.0000000 |
| GO:0048666 | neuron development | 526 | 974 | 0.0000000 |
| GO:0048812 | neuron projection morphogenesis | 338 | 570 | 0.0000000 |
| GO:0022008 | neurogenesis | 726 | 1443 | 0.0000000 |
| GO:0048699 | generation of neurons | 678 | 1346 | 0.0000000 |
| GO:0010975 | regulation of neuron projection development | 247 | 402 | 0.0000000 |
| GO:0030182 | neuron differentiation | 619 | 1225 | 0.0000000 |
| GO:0048667 | cell morphogenesis involved in neuron differentiation | 301 | 519 | 0.0000000 |
| GO:0051960 | regulation of nervous system development | 400 | 755 | 0.0000000 |
| GO:0050767 | regulation of neurogenesis | 356 | 667 | 0.0000000 |
| GO:0045664 | regulation of neuron differentiation | 301 | 547 | 0.0000000 |
| GO:0010976 | positive regulation of neuron projection development | 152 | 236 | 0.0000000 |
| GO:0007269 | neurotransmitter secretion | 98 | 138 | 0.0000000 |
| GO:0051962 | positive regulation of nervous system development | 251 | 453 | 0.0000000 |
| GO:0001505 | regulation of neurotransmitter levels | 120 | 184 | 0.0000000 |
| GO:0006836 | neurotransmitter transport | 121 | 186 | 0.0000000 |
| GO:0050769 | positive regulation of neurogenesis | 217 | 392 | 0.0000000 |
| GO:0045666 | positive regulation of neuron differentiation | 181 | 318 | 0.0000000 |
| GO:0007417 | central nervous system development | 434 | 898 | 0.0000000 |
| GO:1990138 | neuron projection extension | 92 | 140 | 0.0000000 |
| GO:0051961 | negative regulation of nervous system development | 135 | 256 | 0.0000001 |
| GO:0050768 | negative regulation of neurogenesis | 124 | 237 | 0.0000004 |
| GO:0048168 | regulation of neuronal synaptic plasticity | 36 | 50 | 0.0000010 |
| GO:0097485 | neuron projection guidance | 118 | 223 | 0.0000010 |
| GO:0007270 | neuron-neuron synaptic transmission | 76 | 134 | 0.0000021 |
| GO:0046928 | regulation of neurotransmitter secretion | 36 | 53 | 0.0000024 |
| GO:0010977 | negative regulation of neuron projection development | 68 | 119 | 0.0000075 |
| GO:0048857 | neural nucleus development | 40 | 62 | 0.0000085 |
| GO:0048169 | regulation of long-term neuronal synaptic plasticity | 19 | 23 | 0.0000095 |
| GO:0051588 | regulation of neurotransmitter transport | 40 | 64 | 0.0000132 |
| GO:0045665 | negative regulation of neuron differentiation | 96 | 184 | 0.0000156 |
| GO:0070997 | neuron death | 147 | 303 | 0.0000199 |
| GO:0021895 | cerebral cortex neuron differentiation | 18 | 22 | 0.0000367 |
| GO:0051402 | neuron apoptotic process | 109 | 218 | 0.0000438 |
| GO:0048791 | calcium ion-regulated exocytosis of neurotransmitter | 21 | 30 | 0.0000513 |
| GO:0043523 | regulation of neuron apoptotic process | 98 | 194 | 0.0000669 |
| GO:1901214 | regulation of neuron death | 130 | 269 | 0.0000713 |
| GO:0060052 | neurofilament cytoskeleton organization | 9 | 9 | 0.0000756 |
| GO:0008038 | neuron recognition | 24 | 34 | 0.0000823 |
| GO:0007272 | ensheathment of neurons | 60 | 112 | 0.0002505 |
| GO:1990089 | response to nerve growth factor | 29 | 46 | 0.0003811 |
| GO:0001764 | neuron migration | 70 | 136 | 0.0004050 |
| GO:0098877 | neurotransmitter receptor transport to plasma membrane | 10 | 11 | 0.0005700 |
| GO:0021954 | central nervous system neuron development | 40 | 70 | 0.0005733 |
| GO:1901216 | positive regulation of neuron death | 38 | 68 | 0.0009069 |
| GO:0031102 | neuron projection regeneration | 29 | 49 | 0.0010986 |
| GO:0098969 | neurotransmitter receptor transport to postsynaptic membrane | 9 | 10 | 0.0012879 |
| GO:1990090 | cellular response to nerve growth factor stimulus | 26 | 43 | 0.0020896 |
| GO:0021626 | central nervous system maturation | 6 | 6 | 0.0023247 |
| GO:0099637 | neurotransmitter receptor transport | 11 | 14 | 0.0028902 |
| GO:0014041 | regulation of neuron maturation | 9 | 11 | 0.0033223 |
| GO:0021952 | central nervous system projection neuron axonogenesis | 15 | 22 | 0.0034720 |
| GO:0072578 | neurotransmitter-gated ion channel clustering | 7 | 8 | 0.0036446 |
| GO:0099639 | neurotransmitter receptor transport, endosome to plasma membrane | 6 | 6 | 0.0038856 |
| GO:0042551 | neuron maturation | 25 | 43 | 0.0044646 |
| GO:0050905 | neuromuscular process | 50 | 100 | 0.0045270 |
| GO:0070570 | regulation of neuron projection regeneration | 16 | 25 | 0.0049648 |
| GO:0043524 | negative regulation of neuron apoptotic process | 65 | 136 | 0.0057711 |
| GO:1901215 | negative regulation of neuron death | 85 | 183 | 0.0066481 |
| GO:0043525 | positive regulation of neuron apoptotic process | 27 | 49 | 0.0073235 |
| GO:0099601 | regulation of neurotransmitter receptor activity | 20 | 34 | 0.0073346 |
| GO:0050885 | neuromuscular process controlling balance | 28 | 52 | 0.0083336 |
| GO:0048170 | positive regulation of long-term neuronal synaptic plasticity | 5 | 5 | 0.0084868 |
| GO:0098887 | neurotransmitter receptor transport, endosome to postsynaptic membrane | 5 | 5 | 0.0095453 |
| GO:0021892 | cerebral cortex GABAergic interneuron differentiation | 9 | 12 | 0.0103254 |
| GO:0021955 | central nervous system neuron axonogenesis | 17 | 28 | 0.0111151 |
| GO:0060384 | innervation | 16 | 25 | 0.0114348 |
| GO:0014042 | positive regulation of neuron maturation | 4 | 4 | 0.0156775 |
| GO:0097118 | neuroligin clustering involved in postsynaptic membrane assembly | 4 | 4 | 0.0162502 |
| GO:0021953 | central nervous system neuron differentiation | 77 | 173 | 0.0204099 |
| GO:0051581 | negative regulation of neurotransmitter uptake | 3 | 3 | 0.0211798 |
| GO:1902284 | neuron projection extension involved in neuron projection guidance | 13 | 21 | 0.0213159 |
| GO:0048172 | regulation of short-term neuronal synaptic plasticity | 10 | 15 | 0.0262543 |
| GO:0021884 | forebrain neuron development | 17 | 31 | 0.0272162 |
| GO:2000178 | negative regulation of neural precursor cell proliferation | 11 | 18 | 0.0278641 |
| GO:0001504 | neurotransmitter uptake | 13 | 23 | 0.0288950 |
| GO:0038179 | neurotrophin signaling pathway | 19 | 34 | 0.0293080 |
| GO:0098881 | exocytic insertion of neurotransmitter receptor to plasma membrane | 4 | 4 | 0.0300200 |
| GO:0098967 | exocytic insertion of neurotransmitter receptor to postsynaptic membrane | 4 | 4 | 0.0300200 |
| GO:0021843 | substrate-independent telencephalic tangential interneuron migration | 7 | 10 | 0.0319943 |
| GO:0021894 | cerebral cortex GABAergic interneuron development | 6 | 8 | 0.0339987 |
| GO:0007422 | peripheral nervous system development | 35 | 72 | 0.0353098 |
| GO:2001224 | positive regulation of neuron migration | 7 | 10 | 0.0362737 |
| GO:0038180 | nerve growth factor signaling pathway | 6 | 8 | 0.0397597 |
| GO:0036480 | neuron intrinsic apoptotic signaling pathway in response to oxidative stress | 6 | 8 | 0.0400716 |
| GO:1903376 | regulation of oxidative stress-induced neuron intrinsic apoptotic signaling pathway | 6 | 8 | 0.0400716 |
| GO:0099072 | regulation of postsynaptic specialization membrane neurotransmitter receptor levels | 5 | 6 | 0.0416252 |
| GO:0001956 | positive regulation of neurotransmitter secretion | 8 | 12 | 0.0463670 |
| GO:0036475 | neuron death in response to oxidative stress | 12 | 20 | 0.0486823 |
Summary of the 50th most significant “molecular functions” terms
kable(t3, format = 'markdown')
| category | term | numDEInCat | numInCat | over_represented_pvalue |
|---|---|---|---|---|
| GO:0005515 | protein binding | 4443 | 10836 | 0e+00 |
| GO:0005488 | binding | 5463 | 14089 | 0e+00 |
| GO:0003723 | RNA binding | 767 | 1596 | 0e+00 |
| GO:0019899 | enzyme binding | 846 | 1816 | 0e+00 |
| GO:0003824 | catalytic activity | 2354 | 5720 | 0e+00 |
| GO:0008092 | cytoskeletal protein binding | 419 | 844 | 0e+00 |
| GO:0016740 | transferase activity | 1017 | 2366 | 0e+00 |
| GO:1901265 | nucleoside phosphate binding | 910 | 2077 | 0e+00 |
| GO:0000166 | nucleotide binding | 909 | 2076 | 0e+00 |
| GO:0032553 | ribonucleotide binding | 816 | 1855 | 0e+00 |
| GO:0035639 | purine ribonucleoside triphosphate binding | 790 | 1795 | 0e+00 |
| GO:0017076 | purine nucleotide binding | 812 | 1851 | 0e+00 |
| GO:0032555 | purine ribonucleotide binding | 806 | 1838 | 0e+00 |
| GO:0036094 | small molecule binding | 1019 | 2378 | 0e+00 |
| GO:0043168 | anion binding | 1120 | 2650 | 0e+00 |
| GO:0016772 | transferase activity, transferring phosphorus-containing groups | 451 | 983 | 0e+00 |
| GO:0016301 | kinase activity | 389 | 842 | 0e+00 |
| GO:0043167 | ion binding | 2377 | 6030 | 0e+00 |
| GO:0016773 | phosphotransferase activity, alcohol group as acceptor | 358 | 774 | 0e+00 |
| GO:0019901 | protein kinase binding | 262 | 540 | 0e+00 |
| GO:0019900 | kinase binding | 292 | 611 | 0e+00 |
| GO:0044877 | macromolecular complex binding | 417 | 911 | 0e+00 |
| GO:0015631 | tubulin binding | 152 | 285 | 0e+00 |
| GO:0097367 | carbohydrate derivative binding | 922 | 2188 | 0e+00 |
| GO:0019904 | protein domain specific binding | 312 | 660 | 0e+00 |
| GO:0030554 | adenyl nucleotide binding | 654 | 1511 | 0e+00 |
| GO:0016817 | hydrolase activity, acting on acid anhydrides | 384 | 833 | 0e+00 |
| GO:0016818 | hydrolase activity, acting on acid anhydrides, in phosphorus-containing anhydrides | 383 | 831 | 0e+00 |
| GO:0004674 | protein serine/threonine kinase activity | 218 | 445 | 0e+00 |
| GO:0032559 | adenyl ribonucleotide binding | 648 | 1500 | 0e+00 |
| GO:0004672 | protein kinase activity | 299 | 640 | 0e+00 |
| GO:0016462 | pyrophosphatase activity | 381 | 829 | 0e+00 |
| GO:0005524 | ATP binding | 632 | 1464 | 0e+00 |
| GO:0060589 | nucleoside-triphosphatase regulator activity | 172 | 348 | 1e-07 |
| GO:0019905 | syntaxin binding | 53 | 83 | 1e-07 |
| GO:0008047 | enzyme activator activity | 238 | 502 | 1e-07 |
| GO:0003924 | GTPase activity | 144 | 273 | 1e-07 |
| GO:1901363 | heterocyclic compound binding | 2278 | 5812 | 1e-07 |
| GO:0017111 | nucleoside-triphosphatase activity | 360 | 789 | 1e-07 |
| GO:0044769 | ATPase activity, coupled to transmembrane movement of ions, rotational mechanism | 26 | 31 | 2e-07 |
| GO:0045296 | cadherin binding | 154 | 304 | 3e-07 |
| GO:0097159 | organic cyclic compound binding | 2306 | 5896 | 4e-07 |
| GO:0003779 | actin binding | 193 | 399 | 5e-07 |
| GO:0003954 | NADH dehydrogenase activity | 30 | 38 | 5e-07 |
| GO:0008137 | NADH dehydrogenase (ubiquinone) activity | 30 | 38 | 5e-07 |
| GO:0050136 | NADH dehydrogenase (quinone) activity | 30 | 38 | 5e-07 |
| GO:0030695 | GTPase regulator activity | 151 | 310 | 7e-07 |
| GO:0050839 | cell adhesion molecule binding | 214 | 451 | 7e-07 |
| GO:0017075 | syntaxin-1 binding | 17 | 19 | 8e-07 |
| GO:0008022 | protein C-terminus binding | 98 | 178 | 1e-06 |
Summary of the 50th most significant “cellular component” terms
kable(t4, format = 'markdown')
| category | term | numDEInCat | numInCat | over_represented_pvalue |
|---|---|---|---|---|
| GO:0044424 | intracellular part | 5573 | 13845 | 0 |
| GO:0005737 | cytoplasm | 4598 | 10995 | 0 |
| GO:0005622 | intracellular | 5658 | 14144 | 0 |
| GO:0044444 | cytoplasmic part | 3881 | 9072 | 0 |
| GO:0097458 | neuron part | 755 | 1307 | 0 |
| GO:0043005 | neuron projection | 576 | 949 | 0 |
| GO:0044446 | intracellular organelle part | 3636 | 8609 | 0 |
| GO:0043229 | intracellular organelle | 4875 | 12081 | 0 |
| GO:0044422 | organelle part | 3703 | 8817 | 0 |
| GO:0045202 | synapse | 494 | 810 | 0 |
| GO:0043231 | intracellular membrane-bounded organelle | 4257 | 10390 | 0 |
| GO:0044456 | synapse part | 419 | 675 | 0 |
| GO:0043227 | membrane-bounded organelle | 4853 | 12109 | 0 |
| GO:0043226 | organelle | 5179 | 13051 | 0 |
| GO:0005829 | cytosol | 2133 | 4749 | 0 |
| GO:0030424 | axon | 255 | 385 | 0 |
| GO:0036477 | somatodendritic compartment | 387 | 656 | 0 |
| GO:0032991 | macromolecular complex | 2071 | 4717 | 0 |
| GO:0031090 | organelle membrane | 1298 | 2800 | 0 |
| GO:0120025 | plasma membrane bounded cell projection | 858 | 1729 | 0 |
| GO:0042995 | cell projection | 899 | 1837 | 0 |
| GO:0044464 | cell part | 6133 | 16128 | 0 |
| GO:0098794 | postsynapse | 255 | 407 | 0 |
| GO:0005623 | cell | 6138 | 16154 | 0 |
| GO:0030425 | dendrite | 280 | 462 | 0 |
| GO:0031975 | envelope | 573 | 1121 | 0 |
| GO:0031967 | organelle envelope | 572 | 1120 | 0 |
| GO:0044428 | nuclear part | 1826 | 4222 | 0 |
| GO:1902494 | catalytic complex | 644 | 1284 | 0 |
| GO:0031981 | nuclear lumen | 1676 | 3866 | 0 |
| GO:0043209 | myelin sheath | 123 | 167 | 0 |
| GO:0098793 | presynapse | 213 | 343 | 0 |
| GO:0005739 | mitochondrion | 787 | 1657 | 0 |
| GO:0031974 | membrane-enclosed lumen | 2066 | 4889 | 0 |
| GO:0043233 | organelle lumen | 2066 | 4889 | 0 |
| GO:0070013 | intracellular organelle lumen | 2066 | 4889 | 0 |
| GO:0005654 | nucleoplasm | 1433 | 3281 | 0 |
| GO:0044429 | mitochondrial part | 488 | 970 | 0 |
| GO:0005740 | mitochondrial envelope | 372 | 703 | 0 |
| GO:0030054 | cell junction | 585 | 1192 | 0 |
| GO:0098800 | inner mitochondrial membrane protein complex | 89 | 112 | 0 |
| GO:0033267 | axon part | 123 | 175 | 0 |
| GO:0019866 | organelle inner membrane | 292 | 531 | 0 |
| GO:0005743 | mitochondrial inner membrane | 265 | 472 | 0 |
| GO:0044297 | cell body | 271 | 483 | 0 |
| GO:0098798 | mitochondrial protein complex | 99 | 134 | 0 |
| GO:0098796 | membrane protein complex | 345 | 641 | 0 |
| GO:0098984 | neuron to neuron synapse | 134 | 201 | 0 |
| GO:0044463 | cell projection part | 499 | 1002 | 0 |
| GO:0044455 | mitochondrial membrane part | 127 | 188 | 0 |
myttg <- ttg %>% filter(target_id %in% devAdultCA2) %>% dplyr::select(c('target_id', 'symbol'))
symbols <- myttg$symbol
names(symbols) <- myttg$target_id
sel_vars <- metadata2 %>%
filter(age == 'Adult' | grepl('CP', sample)) %>%
dplyr::select('sample') %>%
as.list() %>% unlist() %>% as.vector()
obs <-
counts(ddsWald2, normalized=TRUE) %>%
# assay(rld2) %>%
as.data.frame() %>%
rownames_to_column('target_id') %>%
filter(target_id %in% devAdultCA2) %>%
dplyr::select(target_id, sel_vars) %>%
gather(sel_vars, key = 'sample', value = 'est_counts') %>%
spread(target_id, est_counts) %>%
remove_rownames() %>%
column_to_rownames(var="sample") %>%
as.matrix()
colnames(obs) <- symbols[colnames(obs)]
labels <- metadata2 %>%
filter(age == 'Adult' | grepl('CP', sample)) %>%
dplyr::select(c('sample','age')) %>%
arrange(sample) %>%
remove_rownames() %>%
column_to_rownames(var='sample')
Rowv <- obs %>%
dist(method="euclidean") %>%
hclust(method = "complete") %>%
as.dendrogram %>%
# rotate(c(6:7,1:5)) %>%
color_branches(k = 3) %>%
set("branches_lwd", 4)
obs[which(obs == 0)] <- 1
par(mar=c(5,4,4,2)+0.1)
heatmap.2(obs, Rowv = Rowv, trace="none", margins=c(5,9), srtCol = 45,
symm = F, symkey = F, symbreaks = F, scale="none")
This correlation plot uses all the transcripts annotated as PROVISIONAL, REVIEWED or VALIDATED in RefSeq (the high confidence categories)
cor_tlist <- ttg %>% filter(status %in% c('PROVISIONAL', 'REVIEWED', 'VALIDATED')) %>% .[['target_id']]
obs <-
counts(ddsWald2, normalized = TRUE) %>%
# assay(rld2) %>%
as.data.frame() %>%
rownames_to_column('target_id') %>%
filter(target_id %in% cor_tlist) %>%
gather(colnames(assay(rld2)), key = 'sample', value = 'est_counts') %>%
full_join(metadata2, by = 'sample') %>%
dplyr::select(c('grp','target_id','est_counts')) %>%
group_by(grp, target_id) %>%
summarise(est_counts = mean(est_counts)) %>%
spread(target_id, est_counts) %>%
remove_rownames() %>%
column_to_rownames(var="grp") %>%
t() %>%
as.tibble() %>%
filter_at(vars(matches('Child|Adult')), any_vars(. >= 0.5))
colnames(obs) <- metadata2 %>%
dplyr::select(grp) %>%
group_by(grp) %>%
summarise(group_count=n()) %>%
mutate(group = paste0(grp,'(',group_count,')')) %>%
.[['group']]
colpal=brewer.pal(n=8, name="RdBu")
corrplot(cor(obs),
method="circle",
type="lower",
col=colpal,
cl.lim=c(0,1),
number.cex = 0.6,
addCoef.col = "black",
tl.col="black",
tl.cex = 0.75,
tl.srt=45,
diag=FALSE)
This correlation plot uses the ‘significant’ transcripts, which are DE between Fetal and Control samples and also between Autism and Control samples.
obs <-
counts(ddsWald2, normalized = TRUE) %>%
# assay(rld2) %>%
as.data.frame() %>%
rownames_to_column('target_id') %>%
filter(target_id %in% devAdultCAup2) %>%
gather(colnames(assay(ddsWald2)), key = 'sample', value = 'est_counts') %>%
full_join(metadata2, by = 'sample') %>%
dplyr::select(c('grp','target_id','est_counts')) %>%
group_by(grp, target_id) %>%
summarise(est_counts = mean(est_counts)) %>%
spread(target_id, est_counts) %>%
remove_rownames() %>%
column_to_rownames(var="grp") %>%
t() %>%
as.tibble() %>%
filter_at(vars(matches('Adult')), any_vars(. >= 0.5))
colnames(obs) <- metadata2 %>%
dplyr::select(grp) %>%
group_by(grp) %>%
summarise(group_count=n()) %>%
mutate(group = paste0(grp,'(',group_count,')')) %>%
.[['group']]
colpal=brewer.pal(n=8, name="RdBu")
corrplot(cor(obs),
method="circle",
type="lower",
col=colpal,
#cl.lim=c(0,1),
number.cex = 0.6,
addCoef.col = "black",
tl.col="black",
tl.cex = 0.75,
tl.srt=45,
diag=FALSE)
obs <-
counts(ddsWald2, normalized=TRUE) %>%
# assay(rld2) %>%
as.data.frame() %>%
rownames_to_column('target_id') %>%
filter(target_id %in% devAdultCAdown2) %>%
gather(colnames(assay(ddsWald2)), key = 'sample', value = 'est_counts') %>%
full_join(metadata2, by = 'sample') %>%
dplyr::select(c('grp','target_id','est_counts')) %>%
group_by(grp, target_id) %>%
summarise(est_counts = mean(est_counts)) %>%
spread(target_id, est_counts) %>%
remove_rownames() %>%
column_to_rownames(var="grp") %>%
t() %>%
as.tibble() %>%
filter_at(vars(matches('Child|Adult')), any_vars(. >= 0.5))
colnames(obs) <- metadata2 %>%
dplyr::select(grp) %>%
group_by(grp) %>%
summarise(group_count=n()) %>%
mutate(group = paste0(grp,'(',group_count,')')) %>%
.[['group']]
colpal=brewer.pal(n=8, name="RdBu")
corrplot(cor(obs),
method="circle",
type="lower",
col=colpal,
#cl.lim=c(0,1),
number.cex = 0.6,
addCoef.col = "black",
tl.col="black",
tl.cex = 0.75,
tl.srt=45,
diag=FALSE)