library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(tibble)
library(cowplot)
library(metan)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## |=========================================================|
## | Multi-Environment Trial Analysis (metan) v1.18.0 |
## | Author: Tiago Olivoto |
## | Type 'citation('metan')' to know how to cite metan |
## | Type 'vignette('metan_start')' for a short tutorial |
## | Visit 'https://bit.ly/pkgmetan' for a complete tutorial |
## |=========================================================|
##
## Attaching package: 'metan'
## The following objects are masked from 'package:tibble':
##
## column_to_rownames, remove_rownames, rownames_to_column
## The following object is masked from 'package:tidyr':
##
## replace_na
## The following object is masked from 'package:dplyr':
##
## recode_factor
data <- read.csv("/home/mchopra/Documents/PhD-Year1/deconvolution/Deconvolution_results/results/CIBERSORTx_Results.csv")
colnames(data)
## [1] "Mixture" "AGE" "Mid.age"
## [4] "exact_age" "SEX" "DTHHRDY"
## [7] "VSMC_II" "VSMC_I" "Endothelial_I"
## [10] "Fibroblast_I" "Endothelial_II" "Pericyte"
## [13] "X_" "Macrophage" "Lymphatic_Endothelial"
## [16] "Lymphocyte" "Neuron" "Fibroblast_II"
## [19] "Mesothelial" "P.value" "Correlation"
## [22] "RMSE"
cibersort_results <- read.csv("/home/mchopra/Documents/PhD-Year1/deconvolution/Deconvolution_results/results/CIBERSORTx_Results.csv")
library(tidyr)
# Assuming your data frame is named 'data'
long_data <- data %>%
pivot_longer(cols = matches('VSMC_I|VSMC_II|Pericyte|Neuron|Mesothelial|Macrophage|Lymphocyte|Lymphatic_Endothelial|Fibroblast_II|Fibroblast_I|Endothelial_I|Endothelial_II|X_'),
names_to = "Cell_Type",
values_to = "Cell_Count")
# Display the resulting long-format data
#print(long_data)
##linear modelling with age and sex separately
library(RegParallel)
## Loading required package: doParallel
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
## Loading required package: data.table
##
## Attaching package: 'data.table'
## The following object is masked from 'package:metan':
##
## :=
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## Loading required package: stringr
## Loading required package: survival
## Loading required package: arm
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:metan':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: lme4
##
## arm (Version 1.13-1, built: 2022-8-25)
## Working directory is /home/mchopra/Documents/PhD-Year1/deconvolution/Deconvolution_results/results
# Define the response variables
celltypes <- c("VSMC_I", "VSMC_II", "Pericyte", "Mesothelial", "Macrophage", "Fibroblast_II", "Fibroblast_I", "Endothelial_I", "Endothelial_II")
# Initialize a list to store the model results
macro_res_list <- list()
# Use a for loop to fit linear regression models for each response variable
for (ctypes in celltypes) {
formula <- paste(ctypes, '~([*])')
# Note: Replace 'as.numeric([*])' with the actual predictor variable
# you want to use. I've used a placeholder here.
macro_res <- RegParallel(
data = cibersort_results,
formula = formula,
FUN = function(formula, data)
lm(formula = formula, data = data),
FUNtype = 'lm',
variables = colnames(data)[4:5],
blocksize = 2,
p.adjust = "BH",
cores = 2,
nestedParallel = FALSE,
conflevel = 95
)
# Store the results in the list
macro_res_list[[ctypes]] <- macro_res
}
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- VSMC_I
## First 5 formulae:
## -- VSMC_I ~ (exact_age)
## -- VSMC_I ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- VSMC_II
## First 5 formulae:
## -- VSMC_II ~ (exact_age)
## -- VSMC_II ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Pericyte
## First 5 formulae:
## -- Pericyte ~ (exact_age)
## -- Pericyte ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Mesothelial
## First 5 formulae:
## -- Mesothelial ~ (exact_age)
## -- Mesothelial ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Macrophage
## First 5 formulae:
## -- Macrophage ~ (exact_age)
## -- Macrophage ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Fibroblast_II
## First 5 formulae:
## -- Fibroblast_II ~ (exact_age)
## -- Fibroblast_II ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Fibroblast_I
## First 5 formulae:
## -- Fibroblast_I ~ (exact_age)
## -- Fibroblast_I ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Endothelial_I
## First 5 formulae:
## -- Endothelial_I ~ (exact_age)
## -- Endothelial_I ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Endothelial_II
## First 5 formulae:
## -- Endothelial_II ~ (exact_age)
## -- Endothelial_II ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
lm_results_age <- dplyr::bind_rows(macro_res_list, .id ="celltypes")
##i couldn’t find the relationship of any of cell type with sex but could find relationship with age.
library(NatParksPalettes)
fit = lm(VSMC_II ~ exact_age + SEX, data=cibersort_results)
summary(fit)
##
## Call:
## lm(formula = VSMC_II ~ exact_age + SEX, data = cibersort_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.239222 -0.054839 0.000006 0.055041 0.307200
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3978720 0.0203168 19.583 < 2e-16 ***
## exact_age 0.0011296 0.0003075 3.674 0.000269 ***
## SEX 0.0081592 0.0082200 0.993 0.321463
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08164 on 429 degrees of freedom
## Multiple R-squared: 0.03204, Adjusted R-squared: 0.02752
## F-statistic: 7.099 on 2 and 429 DF, p-value: 0.0009261
ggplot(data = cibersort_results, aes(x = exact_age, y = VSMC_II, group_by(AGE), color= AGE, fill = AGE)) +
#facet_wrap(~leadsnp, nrow=2) +
geom_jitter() +
# geom_boxplot() +
#geom_smooth(method = "lm") +
geom_abline(slope = coef(fit)[2], intercept = coef(fit)[1], color = "red") +
theme_classic() +
scale_fill_manual(values = natparks.pals("Yellowstone", 6)) +
labs(y= "Vascular Smooth Muscle Cells II", x = "Age")
fit2 = lm(VSMC_I ~ exact_age + SEX, data=cibersort_results)
summary(fit2)
##
## Call:
## lm(formula = VSMC_I ~ exact_age + SEX, data = cibersort_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28374 -0.06546 0.00149 0.07770 0.35864
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3219891 0.0278091 11.579 <2e-16 ***
## exact_age -0.0008242 0.0004209 -1.958 0.0509 .
## SEX -0.0110466 0.0112513 -0.982 0.3268
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1117 on 429 degrees of freedom
## Multiple R-squared: 0.01071, Adjusted R-squared: 0.0061
## F-statistic: 2.323 on 2 and 429 DF, p-value: 0.09926
ggplot(data = cibersort_results, aes(x = exact_age, y = VSMC_I)) +
geom_jitter() +
geom_abline(slope = coef(fit2)[2], intercept = coef(fit2)[1], color = "red") +
theme_classic() +
scale_fill_manual(values = natparks.pals("Yellowstone", 6)) +
labs(y = "Vascular Smooth Muscle Cells I", x = "Age")
vsmc_1 <- ggplot(data, aes(x = exact_age, y = VSMC_I)) +
geom_point(color = "pink") +
geom_smooth(method = "lm", color = "darkmagenta") + # Add a linear regression line without confidence intervals
labs(x = "Age",
y = "Vascular Smooth Muscle cells I",
size = 20) +
theme_classic() +
coord_cartesian(ylim = c(0,0.8))
# Assuming you already have 'p' as your ggplot object
vsmc_1
## `geom_smooth()` using formula = 'y ~ x'
ggsave("/home/mchopra/Documents/PhD-Year1/GTEx/agevsVSMCI.png", vsmc_1, width = 5, height = 4, dpi = 800)
## `geom_smooth()` using formula = 'y ~ x'
vsmc_2 <- ggplot(data, aes(x = exact_age, y = VSMC_II)) +
geom_point(color = "pink") +
geom_smooth(method = "lm", color = "darkmagenta") + # Add a linear regression line without confidence intervals
labs(x = "Age",
y = "Vascular Smooth Muscle cells II",
size = 20) +
theme_classic() +
coord_cartesian(ylim = c(0,0.8))
vsmc_2
## `geom_smooth()` using formula = 'y ~ x'
ggsave("/home/mchopra/Documents/PhD-Year1/GTEx/agevsVSMCII.png", vsmc_2, width = 5, height = 4, dpi = 800)
## `geom_smooth()` using formula = 'y ~ x'
##Linear modelling with both age and sex
# Define the response variables
celltypes <- c("VSMC_I", "VSMC_II", "Pericyte", "Mesothelial", "Macrophage", "Fibroblast_II", "Fibroblast_I", "Endothelial_I", "Endothelial_II")
# Initialize a list to store the model results
macro_res_list <- list()
# Use a for loop to fit linear regression models for each response variable
for (ctypes in celltypes) {
formula <- paste(ctypes, '~exact_age + SEX')
# Note: Replace 'as.numeric([*])' with the actual predictor variable
# you want to use. I've used a placeholder here.
macro_res <- RegParallel(
data = cibersort_results,
formula = formula,
FUN = function(formula, data)
lm(formula = formula, data = cibersort_results),
FUNtype = 'lm',
variables = colnames(data)[4],
blocksize = 1,
p.adjust = "BH",
cores = 2,
nestedParallel = FALSE,
conflevel = 95
)
# Store the results in the list
macro_res_list[[ctypes]] <- macro_res
}
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- VSMC_I ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- VSMC_II ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- Pericyte ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- Mesothelial ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- Macrophage ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- Fibroblast_II ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- Fibroblast_I ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- Endothelial_I ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- Endothelial_II ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
lm_results_age_sex <- dplyr::bind_rows(macro_res_list, .id ="celltypes")
With this i could find only 1 cell type having a relationship and that is mesothelial (in sex in the model - not sure if it is showing any effect here)
fit_age_sex = lm(Mesothelial ~ SEX + exact_age, data=cibersort_results)
summary(fit_age_sex)
##
## Call:
## lm(formula = Mesothelial ~ SEX + exact_age, data = cibersort_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.001585 -0.001044 -0.000841 -0.000177 0.063800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.807e-03 1.114e-03 2.519 0.0121 *
## SEX -8.670e-04 4.508e-04 -1.923 0.0551 .
## exact_age -1.691e-05 1.686e-05 -1.003 0.3165
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.004478 on 429 degrees of freedom
## Multiple R-squared: 0.01049, Adjusted R-squared: 0.005881
## F-statistic: 2.275 on 2 and 429 DF, p-value: 0.104
ggplot(data = cibersort_results, aes(x = exact_age, y = Mesothelial, group_by(AGE),color= AGE, fill=AGE)) +
#facet_wrap(~leadsnp, nrow=2) +
geom_jitter() +
geom_boxplot() +
# geom_smooth(method = "lm") +
geom_abline(slope = coef(fit_age_sex)[2], intercept = coef(fit_age_sex)[1], color = "red") +
theme_classic() +
labs(y= "Mesothelial", x = "Sex")
ggplot(data = cibersort_results, aes(x = SEX, y = Mesothelial, color = as.factor(SEX), fill = as.factor(SEX))) +
geom_boxplot() +
geom_jitter() + # You can use jitter after boxplot if needed
geom_abline(slope = coef(fit_age_sex)[2], intercept = coef(fit_age_sex)[1], color = "red") +
theme_classic() +
labs(y = "Mesothelial", x = "Sex")
cor(cibersort_results$exact_age, cibersort_results$VSMC_I, method = c("spearman"))
## [1] -0.09893546
cor(cibersort_results$exact_age, cibersort_results$VSMC_I, method = c("pearson"))
## [1] -0.09213446
cor(cibersort_results$exact_age, cibersort_results$VSMC_I, method = c("kendall"))
## [1] -0.06797777
cor(cibersort_results$exact_age, cibersort_results$VSMC_II, method = c("spearman"))
## [1] 0.1887292
cor(cibersort_results$exact_age, cibersort_results$VSMC_II, method = c("pearson"))
## [1] 0.1726686
cor(cibersort_results$exact_age, cibersort_results$VSMC_II, method = c("kendall"))
## [1] 0.1269018
cor(cibersort_results$SEX, cibersort_results$Mesothelial, method = c("spearman"))
## [1] -0.09763152
cor(cibersort_results$SEX, cibersort_results$Mesothelial, method = c("pearson"))
## [1] -0.09041257
cor(cibersort_results$SEX, cibersort_results$Mesothelial, method = c("kendall"))
## [1] -0.09611186
##correlation of age with different cell types ##### #library(dplyr) #library(“Hmisc”) #library(“ggplot2”) #library(“ggpubr”) #library(“cowplot”) #library(corrplot) #library(dplyr)
#corr_data = cibersort_results %>% # select(“exact_age”, “VSMC_I”, “VSMC_II”, “Pericyte”, “Mesothelial”, “Macrophage”, “Fibroblast_II”, “Fibroblast_I”, “Endothelial_I”, “Endothelial_II”)
#res <- cor(corr_data, method = c(“pearson”)) #round(res,2)
#corr_plot <- corrplot(res, type = “upper”, order = “hclust”, tl.col = “black”, tl.srt = 45, addCoef.col = “black”, number.cex = 1.0)
#heatmap(x = res, symm = TRUE) #####
table(data$AGE)
##
## 20-29 30-39 40-49 50-59 60-69 70-79
## 37 38 68 149 131 9
barplot(table(data$AGE))
max(data$exact_age)
## [1] 70
min(data$exact_age)
## [1] 21
data_long <- data %>%
pivot_longer(cols = c("VSMC_II", "VSMC_I", "Endothelial_I", "Fibroblast_I", "Endothelial_II", "Pericyte", "Macrophage", "Fibroblast_II", "Mesothelial"), names_to = "Cell_Type", values_to = "Fraction")
##removing X_, Lymphatic_Endothelial, Lymphocyte, Neuron
p <- data_long %>%
group_by(AGE,Cell_Type) %>%
summarise(mf = mean(Fraction)) %>%
ungroup()%>%
ggplot(aes(y = AGE, x = mf, fill = Cell_Type)) +
geom_bar(stat = "identity") +
scale_fill_brewer(palette = "Spectral") +
labs(y = "Age", x = "Fraction of Cell Types", size = 20) +
theme_minimal() +
coord_flip()
## `summarise()` has grouped output by 'AGE'. You can override using the `.groups`
## argument.
p
p <- recordPlot()
png("cellular_composition.png", width = 6, height = 4, units = "in", res = 800, bg = "transparent")
replayPlot(p)
dev.off()
## png
## 2
#ggplotly(p)
##CHI-SQUARED TEST - to examine the difference between the categorical variables from a random sample in order to judge the goodness of bit between expected and observed.
##BOX-WHISKER PLOTS IN BETWEEN THE DIFFERENT CELL TYPES ##Total cell types = 12
library(metan) data\(exact_age <- as.numeric(data\)exact_age) corr_with_age = data %>% select(‘exact_age’, ‘VSMC_I’, ‘VSMC_II’, ‘Pericyte’, ‘Neuron’, ‘Mesothelial’, ‘Macrophage’, ‘Lymphocyte’, ‘Lymphatic_Endothelial’, ‘Fibroblast_II’, ‘Fibroblast_I’, ‘Endothelial_I’, ‘Endothelial_II’, ‘X_’)
all <- corr_coef( corr_with_age, use = c(“complete.obs”) )
all$cor
all\(cor[is.na(all\)cor)] <- 0
all$cor
#image(all, main = “Correlation Matrix Heatmap”) library(pheatmap)
pheatmap(all$cor, display_numbers = TRUE) ###