library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(tibble)
library(cowplot)
library(metan)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## |=========================================================|
## | Multi-Environment Trial Analysis (metan) v1.18.0 |
## | Author: Tiago Olivoto |
## | Type 'citation('metan')' to know how to cite metan |
## | Type 'vignette('metan_start')' for a short tutorial |
## | Visit 'https://bit.ly/pkgmetan' for a complete tutorial |
## |=========================================================|
##
## Attaching package: 'metan'
## The following objects are masked from 'package:tibble':
##
## column_to_rownames, remove_rownames, rownames_to_column
## The following object is masked from 'package:tidyr':
##
## replace_na
## The following object is masked from 'package:dplyr':
##
## recode_factor
data <- read.csv("/home/mchopra/Documents/PhD-Year1/deconvolution/Deconvolution_results/results/CIBERSORTx_Results.csv")
colnames(data)
## [1] "Mixture" "AGE" "Mid.age"
## [4] "SEX" "DTHHRDY" "VSMC_II"
## [7] "VSMC_I" "Endothelial_I" "Fibroblast_I"
## [10] "Endothelial_II" "Pericyte" "X_"
## [13] "Macrophage" "Lymphatic_Endothelial" "Lymphocyte"
## [16] "Neuron" "Fibroblast_II" "Mesothelial"
## [19] "P.value" "Correlation" "RMSE"
data$fake = sample(1:432)
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
fit = lm(VSMC_I ~ fake + SEX, data=data)
summary(fit)
##
## Call:
## lm(formula = VSMC_I ~ fake + SEX, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.26709 -0.06987 0.00008 0.07586 0.34998
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.749e-01 1.896e-02 14.501 <2e-16 ***
## fake 1.225e-05 4.332e-05 0.283 0.778
## SEX -1.003e-02 1.130e-02 -0.888 0.375
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1122 on 429 degrees of freedom
## Multiple R-squared: 0.002055, Adjusted R-squared: -0.002598
## F-statistic: 0.4416 on 2 and 429 DF, p-value: 0.6433
# this line returns predictions by the model (on the same data) - controls for the effect of Sex
data$predict_vsmc_i = predict(fit, newdata = data)
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
##
## get_legend
# this is a simpler plot, it plots AGE and VSMC_I and comutes the correlation R + pvalue
# but it does not control for sex - we are plotting raw data
ggscatter(data, x="fake", y="VSMC_I", facet.by = "AGE", cor.coef = TRUE, add = "reg.line")
## this plot plots the predicted values from "fit" which accounts for the effect of Sex
(mm_plot <- ggplot(data, aes(x = fake, y = VSMC_I, colour = AGE)) +
facet_wrap(~AGE, nrow=2) + # a panel for each mountain range
geom_point(alpha = 0.5) +
theme_classic() +
geom_line(data = cbind(data, pred = predict(fit)), aes(y = pred), size = 1) +
geom_smooth(method = "lm", colour = "orange", se = FALSE) + # adding predicted line from mixed model
theme(legend.position = "none",
panel.spacing = unit(2, "lines")) # adding space between panels
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
colnames(data)
## [1] "Mixture" "AGE" "Mid.age"
## [4] "SEX" "DTHHRDY" "VSMC_II"
## [7] "VSMC_I" "Endothelial_I" "Fibroblast_I"
## [10] "Endothelial_II" "Pericyte" "X_"
## [13] "Macrophage" "Lymphatic_Endothelial" "Lymphocyte"
## [16] "Neuron" "Fibroblast_II" "Mesothelial"
## [19] "P.value" "Correlation" "RMSE"
## [22] "fake" "predict_vsmc_i"
table(data$AGE)
##
## 20-29 30-39 40-49 50-59 60-69 70-79
## 37 38 68 149 131 9
barplot(table(data$AGE))
data_long <- data %>%
pivot_longer(cols = c("VSMC_II", "VSMC_I", "Endothelial_I", "Fibroblast_I", "Endothelial_II", "X_", "Pericyte", "Macrophage", "Lymphatic_Endothelial", "Lymphocyte", "Neuron", "Fibroblast_II", "Mesothelial"), names_to = "Cell_Type", values_to = "Fraction")
p <- data_long %>%
group_by(AGE,Cell_Type) %>%
summarise(mf = mean(Fraction)) %>%
ungroup()%>%
ggplot(aes(y = AGE, x = mf, fill = Cell_Type)) +
geom_bar(stat = "identity") +
scale_fill_brewer(palette = "Paired") +
labs(y = "age", x = "Fraction of Cell Types") +
theme_minimal() +
coord_flip()
## `summarise()` has grouped output by 'AGE'. You can override using the `.groups`
## argument.
p
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Paired is 12
## Returning the palette you asked for with that many colors
#ggplotly(p)
##CHI-SQUARED TEST - to examine the difference between the categorical variables from a random sample in order to judge the goodness of bit between expected and observed.
##BOX-WHISKER PLOTS IN BETWEEN THE DIFFERENT CELL TYPES ##Total cell types = 12
a <- ggplot(data, aes(x=AGE, y=VSMC_I)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue")
b <- ggplot(data, aes(x=AGE, y=VSMC_II)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue")
c <- ggplot(data, aes(x=AGE, y=Pericyte)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue")
d <- ggplot(data, aes(x=AGE, y=Neuron)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue")
e <- ggplot(data, aes(x=AGE, y=Mesothelial)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue")
f <- ggplot(data, aes(x=AGE, y=Macrophage)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue")
g <- ggplot(data, aes(x=AGE, y=Lymphocyte)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue")
h <- ggplot(data, aes(x=AGE, y=Lymphatic_Endothelial)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue")
i <- ggplot(data, aes(x=AGE, y=Fibroblast_II)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue")
j <- ggplot(data, aes(x=AGE, y=Fibroblast_I)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue")
k <- ggplot(data, aes(x=AGE, y=Endothelial_I)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue")
l <- ggplot(data, aes(x=AGE, y=Endothelial_II)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue")
m <- ggplot(data, aes(x=AGE, y=X_)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue")
plot_grid(a, b, c, d, e, f, g, h, i, j, k, l, m, labels = c('VSMC_I', 'VSMC_II', 'Pericyte', 'Neuron', 'Mesothelial', 'Macrophage', 'Lymphocyte', 'Lymphatic_Endothelial', 'Fibroblast_II', 'Fibroblast_I', 'Endothelial_I', 'Endothelial_II'))
A <- ggplot(data, aes(x=AGE, y=VSMC_I)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue") +
geom_jitter(color = "khaki", size = 0.4, alpha= 0.9)
B <- ggplot(data, aes(x=AGE, y=VSMC_II)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue") +
geom_jitter(color = "khaki", size = 0.4, alpha= 0.9)
C <- ggplot(data, aes(x=AGE, y=Pericyte)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue") +
geom_jitter(color = "khaki", size = 0.4, alpha= 0.9)
D <- ggplot(data, aes(x=AGE, y=Neuron)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue") +
geom_jitter(color = "khaki", size = 0.4, alpha= 0.9)
E <- ggplot(data, aes(x=AGE, y=Mesothelial)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue") +
geom_jitter(color = "khaki", size = 0.4, alpha= 0.9)
F <- ggplot(data, aes(x=AGE, y=Macrophage)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue") +
geom_jitter(color = "khaki", size = 0.4, alpha= 0.9)
G <- ggplot(data, aes(x=AGE, y=Lymphocyte)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue") +
geom_jitter(color = "khaki", size = 0.4, alpha= 0.9)
H <- ggplot(data, aes(x=AGE, y=Lymphatic_Endothelial)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue") +
geom_jitter(color = "khaki", size = 0.4, alpha= 0.9)
I <- ggplot(data, aes(x=AGE, y=Fibroblast_II)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue") +
geom_jitter(color = "khaki", size = 0.4, alpha= 0.9)
J <- ggplot(data, aes(x=AGE, y=Fibroblast_I)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue") +
geom_jitter(color = "khaki", size = 0.4, alpha= 0.9)
K <- ggplot(data, aes(x=AGE, y=Endothelial_I)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue") +
geom_jitter(color = "khaki", size = 0.4, alpha= 0.9)
L <- ggplot(data, aes(x=AGE, y=Endothelial_II)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue") +
geom_jitter(color = "khaki", size = 0.4, alpha= 0.9)
M <- ggplot(data, aes(x=AGE, y=X_)) +
geom_boxplot() +
theme_classic() +
stat_summary(fun=median,
geom = "line",
aes(group = 1),
col = "red") +
stat_summary(fun = mean,
geom = "point",
aes(group = 1),
col = "blue") +
geom_jitter(color = "khaki", size = 0.4, alpha= 0.9)
plot_grid(A, B, C, D, E, F, G, H, I, J, K, L, M, labels = c('VSMC_I', 'VSMC_II', 'Pericyte', 'Neuron', 'Mesothelial', 'Macrophage', 'Lymphocyte', 'Lymphatic_Endothelial', 'Fibroblast_II', 'Fibroblast_I', 'Endothelial_I', 'Endothelial_II', 'Unknown - X_'))
data$Mid.age <- as.numeric(data$Mid.age)
corr_with_age = data %>%
select('Mid.age', 'VSMC_I', 'VSMC_II', 'Pericyte', 'Neuron', 'Mesothelial', 'Macrophage', 'Lymphocyte', 'Lymphatic_Endothelial', 'Fibroblast_II', 'Fibroblast_I', 'Endothelial_I', 'Endothelial_II', 'X_')
all <- corr_coef(
corr_with_age,
use = c("complete.obs")
)
## Warning in cor(x, method = method, use = use): the standard deviation is zero
all$cor
## Mid.age VSMC_I VSMC_II Pericyte Neuron
## Mid.age 1.000000000 -0.09660161 0.16519470 -0.10154418 NA
## VSMC_I -0.096601608 1.00000000 -0.37309085 -0.14237406 NA
## VSMC_II 0.165194697 -0.37309085 1.00000000 -0.25727078 NA
## Pericyte -0.101544185 -0.14237406 -0.25727078 1.00000000 NA
## Neuron NA NA NA NA 1
## Mesothelial -0.024306403 -0.26214995 -0.06951525 0.05979972 NA
## Macrophage -0.017745666 -0.50936046 -0.24824351 0.14491329 NA
## Lymphocyte NA NA NA NA NA
## Lymphatic_Endothelial NA NA NA NA NA
## Fibroblast_II 0.009324993 -0.11355975 -0.07956444 0.02664862 NA
## Fibroblast_I 0.005710435 -0.29492667 -0.16956977 0.17811322 NA
## Endothelial_I -0.046695755 -0.17054165 -0.11614072 0.43101771 NA
## Endothelial_II -0.009942640 -0.65988312 -0.31583644 0.21625842 NA
## X_ NA NA NA NA NA
## Mesothelial Macrophage Lymphocyte Lymphatic_Endothelial
## Mid.age -0.02430640 -0.01774567 NA NA
## VSMC_I -0.26214995 -0.50936046 NA NA
## VSMC_II -0.06951525 -0.24824351 NA NA
## Pericyte 0.05979972 0.14491329 NA NA
## Neuron NA NA NA NA
## Mesothelial 1.00000000 0.17131878 NA NA
## Macrophage 0.17131878 1.00000000 NA NA
## Lymphocyte NA NA 1 NA
## Lymphatic_Endothelial NA NA NA 1
## Fibroblast_II 0.26441047 0.09421798 NA NA
## Fibroblast_I 0.09461708 0.30493560 NA NA
## Endothelial_I -0.01776730 0.02433995 NA NA
## Endothelial_II 0.27719401 0.33962552 NA NA
## X_ NA NA NA NA
## Fibroblast_II Fibroblast_I Endothelial_I Endothelial_II
## Mid.age 0.009324993 0.005710435 -0.046695755 -0.00994264
## VSMC_I -0.113559753 -0.294926674 -0.170541646 -0.65988312
## VSMC_II -0.079564438 -0.169569769 -0.116140723 -0.31583644
## Pericyte 0.026648616 0.178113222 0.431017712 0.21625842
## Neuron NA NA NA NA
## Mesothelial 0.264410466 0.094617076 -0.017767301 0.27719401
## Macrophage 0.094217981 0.304935604 0.024339946 0.33962552
## Lymphocyte NA NA NA NA
## Lymphatic_Endothelial NA NA NA NA
## Fibroblast_II 1.000000000 0.308055667 -0.005620832 0.13275796
## Fibroblast_I 0.308055667 1.000000000 0.082074297 0.25757036
## Endothelial_I -0.005620832 0.082074297 1.000000000 0.19740830
## Endothelial_II 0.132757964 0.257570355 0.197408300 1.00000000
## X_ NA NA NA NA
## X_
## Mid.age NA
## VSMC_I NA
## VSMC_II NA
## Pericyte NA
## Neuron NA
## Mesothelial NA
## Macrophage NA
## Lymphocyte NA
## Lymphatic_Endothelial NA
## Fibroblast_II NA
## Fibroblast_I NA
## Endothelial_I NA
## Endothelial_II NA
## X_ 1
all$cor[is.na(all$cor)] <- 0
all$cor
## Mid.age VSMC_I VSMC_II Pericyte Neuron
## Mid.age 1.000000000 -0.09660161 0.16519470 -0.10154418 0
## VSMC_I -0.096601608 1.00000000 -0.37309085 -0.14237406 0
## VSMC_II 0.165194697 -0.37309085 1.00000000 -0.25727078 0
## Pericyte -0.101544185 -0.14237406 -0.25727078 1.00000000 0
## Neuron 0.000000000 0.00000000 0.00000000 0.00000000 1
## Mesothelial -0.024306403 -0.26214995 -0.06951525 0.05979972 0
## Macrophage -0.017745666 -0.50936046 -0.24824351 0.14491329 0
## Lymphocyte 0.000000000 0.00000000 0.00000000 0.00000000 0
## Lymphatic_Endothelial 0.000000000 0.00000000 0.00000000 0.00000000 0
## Fibroblast_II 0.009324993 -0.11355975 -0.07956444 0.02664862 0
## Fibroblast_I 0.005710435 -0.29492667 -0.16956977 0.17811322 0
## Endothelial_I -0.046695755 -0.17054165 -0.11614072 0.43101771 0
## Endothelial_II -0.009942640 -0.65988312 -0.31583644 0.21625842 0
## X_ 0.000000000 0.00000000 0.00000000 0.00000000 0
## Mesothelial Macrophage Lymphocyte Lymphatic_Endothelial
## Mid.age -0.02430640 -0.01774567 0 0
## VSMC_I -0.26214995 -0.50936046 0 0
## VSMC_II -0.06951525 -0.24824351 0 0
## Pericyte 0.05979972 0.14491329 0 0
## Neuron 0.00000000 0.00000000 0 0
## Mesothelial 1.00000000 0.17131878 0 0
## Macrophage 0.17131878 1.00000000 0 0
## Lymphocyte 0.00000000 0.00000000 1 0
## Lymphatic_Endothelial 0.00000000 0.00000000 0 1
## Fibroblast_II 0.26441047 0.09421798 0 0
## Fibroblast_I 0.09461708 0.30493560 0 0
## Endothelial_I -0.01776730 0.02433995 0 0
## Endothelial_II 0.27719401 0.33962552 0 0
## X_ 0.00000000 0.00000000 0 0
## Fibroblast_II Fibroblast_I Endothelial_I Endothelial_II
## Mid.age 0.009324993 0.005710435 -0.046695755 -0.00994264
## VSMC_I -0.113559753 -0.294926674 -0.170541646 -0.65988312
## VSMC_II -0.079564438 -0.169569769 -0.116140723 -0.31583644
## Pericyte 0.026648616 0.178113222 0.431017712 0.21625842
## Neuron 0.000000000 0.000000000 0.000000000 0.00000000
## Mesothelial 0.264410466 0.094617076 -0.017767301 0.27719401
## Macrophage 0.094217981 0.304935604 0.024339946 0.33962552
## Lymphocyte 0.000000000 0.000000000 0.000000000 0.00000000
## Lymphatic_Endothelial 0.000000000 0.000000000 0.000000000 0.00000000
## Fibroblast_II 1.000000000 0.308055667 -0.005620832 0.13275796
## Fibroblast_I 0.308055667 1.000000000 0.082074297 0.25757036
## Endothelial_I -0.005620832 0.082074297 1.000000000 0.19740830
## Endothelial_II 0.132757964 0.257570355 0.197408300 1.00000000
## X_ 0.000000000 0.000000000 0.000000000 0.00000000
## X_
## Mid.age 0
## VSMC_I 0
## VSMC_II 0
## Pericyte 0
## Neuron 0
## Mesothelial 0
## Macrophage 0
## Lymphocyte 0
## Lymphatic_Endothelial 0
## Fibroblast_II 0
## Fibroblast_I 0
## Endothelial_I 0
## Endothelial_II 0
## X_ 1
#image(all, main = "Correlation Matrix Heatmap")
library(pheatmap)
pheatmap(all$cor, display_numbers = TRUE)