library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(tibble)
library(cowplot)
library(metan)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## |=========================================================|
## | Multi-Environment Trial Analysis (metan) v1.18.0        |
## | Author: Tiago Olivoto                                   |
## | Type 'citation('metan')' to know how to cite metan      |
## | Type 'vignette('metan_start')' for a short tutorial     |
## | Visit 'https://bit.ly/pkgmetan' for a complete tutorial |
## |=========================================================|
## 
## Attaching package: 'metan'
## The following objects are masked from 'package:tibble':
## 
##     column_to_rownames, remove_rownames, rownames_to_column
## The following object is masked from 'package:tidyr':
## 
##     replace_na
## The following object is masked from 'package:dplyr':
## 
##     recode_factor
data <- read.csv("/home/mchopra/Documents/PhD-Year1/deconvolution/Deconvolution_results/results/CIBERSORTx_Results.csv")
colnames(data)
##  [1] "Mixture"               "AGE"                   "Mid.age"              
##  [4] "exact_age"             "SEX"                   "DTHHRDY"              
##  [7] "VSMC_II"               "VSMC_I"                "Endothelial_I"        
## [10] "Fibroblast_I"          "Endothelial_II"        "Pericyte"             
## [13] "X_"                    "Macrophage"            "Lymphatic_Endothelial"
## [16] "Lymphocyte"            "Neuron"                "Fibroblast_II"        
## [19] "Mesothelial"           "P.value"               "Correlation"          
## [22] "RMSE"
cibersort_results <- read.csv("/home/mchopra/Documents/PhD-Year1/deconvolution/Deconvolution_results/results/CIBERSORTx_Results.csv")
library(tidyr)

# Assuming your data frame is named 'data'
long_data <- data %>%
  pivot_longer(cols = matches('VSMC_I|VSMC_II|Pericyte|Neuron|Mesothelial|Macrophage|Lymphocyte|Lymphatic_Endothelial|Fibroblast_II|Fibroblast_I|Endothelial_I|Endothelial_II|X_'), 
               names_to = "Cell_Type", 
               values_to = "Cell_Count")

# Display the resulting long-format data
#print(long_data)

##linear modelling with age and sex separately

library(RegParallel)
## Loading required package: doParallel
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
## Loading required package: data.table
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:metan':
## 
##     :=
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## Loading required package: stringr
## Loading required package: survival
## Loading required package: arm
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:metan':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: lme4
## 
## arm (Version 1.13-1, built: 2022-8-25)
## Working directory is /home/mchopra/Documents/PhD-Year1/deconvolution/Deconvolution_results/results
# Define the response variables
celltypes <- c("VSMC_I", "VSMC_II", "Pericyte", "Mesothelial", "Macrophage", "Fibroblast_II", "Fibroblast_I", "Endothelial_I", "Endothelial_II")

# Initialize a list to store the model results
macro_res_list <- list()

# Use a for loop to fit linear regression models for each response variable
for (ctypes in celltypes) {
  formula <- paste(ctypes, '~([*])')

  # Note: Replace 'as.numeric([*])' with the actual predictor variable
  # you want to use. I've used a placeholder here.

  macro_res <- RegParallel(
    data = cibersort_results,
    formula = formula,
    FUN = function(formula, data)
      lm(formula = formula, data = data),
    FUNtype = 'lm',
    variables = colnames(data)[4:5],
    blocksize = 2,
    p.adjust = "BH",
    cores = 2,
    nestedParallel = FALSE,
    conflevel = 95
  )

  # Store the results in the list
  macro_res_list[[ctypes]] <- macro_res
}
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- VSMC_I
## First 5 formulae:
## -- VSMC_I ~ (exact_age)
## -- VSMC_I ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- VSMC_II
## First 5 formulae:
## -- VSMC_II ~ (exact_age)
## -- VSMC_II ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Pericyte
## First 5 formulae:
## -- Pericyte ~ (exact_age)
## -- Pericyte ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Mesothelial
## First 5 formulae:
## -- Mesothelial ~ (exact_age)
## -- Mesothelial ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Macrophage
## First 5 formulae:
## -- Macrophage ~ (exact_age)
## -- Macrophage ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Fibroblast_II
## First 5 formulae:
## -- Fibroblast_II ~ (exact_age)
## -- Fibroblast_II ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Fibroblast_I
## First 5 formulae:
## -- Fibroblast_I ~ (exact_age)
## -- Fibroblast_I ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Endothelial_I
## First 5 formulae:
## -- Endothelial_I ~ (exact_age)
## -- Endothelial_I ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 2
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Endothelial_II
## First 5 formulae:
## -- Endothelial_II ~ (exact_age)
## -- Endothelial_II ~ (SEX)
## -- NULL
## -- NULL
## -- NULL
## Processing 2 formulae, batch 1 of 1
## -- index1: 1; index2: 2
## Done!
lm_results_age <- dplyr::bind_rows(macro_res_list, .id ="celltypes")

##i couldn’t find the relationship of any of cell type with sex but could find relationship with age.

library(NatParksPalettes)
fit = lm(VSMC_II ~ exact_age + SEX, data=cibersort_results)
summary(fit)
## 
## Call:
## lm(formula = VSMC_II ~ exact_age + SEX, data = cibersort_results)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.239222 -0.054839  0.000006  0.055041  0.307200 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.3978720  0.0203168  19.583  < 2e-16 ***
## exact_age   0.0011296  0.0003075   3.674 0.000269 ***
## SEX         0.0081592  0.0082200   0.993 0.321463    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08164 on 429 degrees of freedom
## Multiple R-squared:  0.03204,    Adjusted R-squared:  0.02752 
## F-statistic: 7.099 on 2 and 429 DF,  p-value: 0.0009261
ggplot(data = cibersort_results, aes(x = exact_age, y = VSMC_II, group_by(AGE), color= AGE, fill = AGE)) +
  #facet_wrap(~leadsnp, nrow=2) + 
  geom_jitter() +
 # geom_boxplot() +
 #geom_smooth(method = "lm") +
  geom_abline(slope =  coef(fit)[2], intercept = coef(fit)[1], color = "red") +
  theme_classic() +
    scale_fill_manual(values = natparks.pals("Yellowstone", 6)) +
   labs(y= "Vascular Smooth Muscle Cells II", x = "Age") 

fit2 = lm(VSMC_I ~ exact_age + SEX, data=cibersort_results)
summary(fit2)
## 
## Call:
## lm(formula = VSMC_I ~ exact_age + SEX, data = cibersort_results)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28374 -0.06546  0.00149  0.07770  0.35864 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.3219891  0.0278091  11.579   <2e-16 ***
## exact_age   -0.0008242  0.0004209  -1.958   0.0509 .  
## SEX         -0.0110466  0.0112513  -0.982   0.3268    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1117 on 429 degrees of freedom
## Multiple R-squared:  0.01071,    Adjusted R-squared:  0.0061 
## F-statistic: 2.323 on 2 and 429 DF,  p-value: 0.09926
ggplot(data = cibersort_results, aes(x = exact_age, y = VSMC_I)) +
  geom_jitter() +
  geom_abline(slope =  coef(fit2)[2], intercept = coef(fit2)[1], color = "red") +
  theme_classic() +
  scale_fill_manual(values = natparks.pals("Yellowstone", 6)) +
  labs(y = "Vascular Smooth Muscle Cells I", x = "Age")

vsmc_1 <- ggplot(data, aes(x = exact_age, y = VSMC_I)) +
  geom_point(color = "pink") +
  geom_smooth(method = "lm", color = "darkmagenta") +  # Add a linear regression line without confidence intervals
  labs(x = "Age",
       y = "Vascular Smooth Muscle cells I",
       size = 20) +
  theme_classic() +
  coord_cartesian(ylim = c(0,0.8))

# Assuming you already have 'p' as your ggplot object
vsmc_1
## `geom_smooth()` using formula = 'y ~ x'

ggsave("/home/mchopra/Documents/PhD-Year1/GTEx/agevsVSMCI.png", vsmc_1, width = 5, height = 4, dpi = 800)
## `geom_smooth()` using formula = 'y ~ x'
vsmc_2 <- ggplot(data, aes(x = exact_age, y = VSMC_II)) +
  geom_point(color = "pink") +
  geom_smooth(method = "lm", color = "darkmagenta") +  # Add a linear regression line without confidence intervals
  labs(x = "Age",
       y = "Vascular Smooth Muscle cells II",
       size = 20) +
  theme_classic() +
  coord_cartesian(ylim = c(0,0.8))

vsmc_2
## `geom_smooth()` using formula = 'y ~ x'

ggsave("/home/mchopra/Documents/PhD-Year1/GTEx/agevsVSMCII.png", vsmc_2, width = 5, height = 4, dpi = 800)
## `geom_smooth()` using formula = 'y ~ x'

##Linear modelling with both age and sex

# Define the response variables
celltypes <- c("VSMC_I", "VSMC_II", "Pericyte", "Mesothelial", "Macrophage", "Fibroblast_II", "Fibroblast_I", "Endothelial_I", "Endothelial_II")

# Initialize a list to store the model results
macro_res_list <- list()

# Use a for loop to fit linear regression models for each response variable
for (ctypes in celltypes) {
  formula <- paste(ctypes, '~exact_age + SEX')

  # Note: Replace 'as.numeric([*])' with the actual predictor variable
  # you want to use. I've used a placeholder here.

  macro_res <- RegParallel(
    data = cibersort_results,
    formula = formula,
    FUN = function(formula, data)
      lm(formula = formula, data = cibersort_results),
    FUNtype = 'lm',
    variables = colnames(data)[4],
    blocksize = 1,
    p.adjust = "BH",
    cores = 2,
    nestedParallel = FALSE,
    conflevel = 95
  )

  # Store the results in the list
  macro_res_list[[ctypes]] <- macro_res
}
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- VSMC_I ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- VSMC_II ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- Pericyte ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- Mesothelial ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- Macrophage ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- Fibroblast_II ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- Fibroblast_I ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- Endothelial_I ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 1
## Cores / Threads:
## -- 2
## Terms included in model:
## First 5 formulae:
## -- Endothelial_II ~ exact_age + SEX
## -- NULL
## -- NULL
## -- NULL
## -- NULL
## Processing 1 formulae, batch 1 of 1
## -- index1: 1; index2: 1
## Done!
lm_results_age_sex <- dplyr::bind_rows(macro_res_list, .id ="celltypes")

With this i could find only 1 cell type having a relationship and that is mesothelial (in sex in the model - not sure if it is showing any effect here)

fit_age_sex = lm(Mesothelial ~ SEX + exact_age, data=cibersort_results)
summary(fit_age_sex)
## 
## Call:
## lm(formula = Mesothelial ~ SEX + exact_age, data = cibersort_results)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.001585 -0.001044 -0.000841 -0.000177  0.063800 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  2.807e-03  1.114e-03   2.519   0.0121 *
## SEX         -8.670e-04  4.508e-04  -1.923   0.0551 .
## exact_age   -1.691e-05  1.686e-05  -1.003   0.3165  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.004478 on 429 degrees of freedom
## Multiple R-squared:  0.01049,    Adjusted R-squared:  0.005881 
## F-statistic: 2.275 on 2 and 429 DF,  p-value: 0.104
ggplot(data = cibersort_results, aes(x = exact_age, y = Mesothelial, group_by(AGE),color= AGE, fill=AGE)) +
  #facet_wrap(~leadsnp, nrow=2) + 
  geom_jitter() +
  geom_boxplot() +
 # geom_smooth(method = "lm") +
  geom_abline(slope =  coef(fit_age_sex)[2], intercept = coef(fit_age_sex)[1], color = "red") +
  theme_classic() +
   labs(y= "Mesothelial", x = "Sex")

ggplot(data = cibersort_results, aes(x = SEX, y = Mesothelial, color = as.factor(SEX), fill = as.factor(SEX))) +
  geom_boxplot() +
  geom_jitter() +  # You can use jitter after boxplot if needed
  geom_abline(slope = coef(fit_age_sex)[2], intercept = coef(fit_age_sex)[1], color = "red") +
  theme_classic() +
  labs(y = "Mesothelial", x = "Sex")

cor(cibersort_results$exact_age, cibersort_results$VSMC_I, method = c("spearman"))
## [1] -0.09893546
cor(cibersort_results$exact_age, cibersort_results$VSMC_I, method = c("pearson"))
## [1] -0.09213446
cor(cibersort_results$exact_age, cibersort_results$VSMC_I, method = c("kendall"))
## [1] -0.06797777
cor(cibersort_results$exact_age, cibersort_results$VSMC_II, method = c("spearman"))
## [1] 0.1887292
cor(cibersort_results$exact_age, cibersort_results$VSMC_II, method = c("pearson"))
## [1] 0.1726686
cor(cibersort_results$exact_age, cibersort_results$VSMC_II, method = c("kendall"))
## [1] 0.1269018
cor(cibersort_results$SEX, cibersort_results$Mesothelial, method = c("spearman"))
## [1] -0.09763152
cor(cibersort_results$SEX, cibersort_results$Mesothelial, method = c("pearson"))
## [1] -0.09041257
cor(cibersort_results$SEX, cibersort_results$Mesothelial, method = c("kendall"))
## [1] -0.09611186

##correlation of age with different cell types ##### #library(dplyr) #library(“Hmisc”) #library(“ggplot2”) #library(“ggpubr”) #library(“cowplot”) #library(corrplot) #library(dplyr)

#corr_data = cibersort_results %>% # select(“exact_age”, “VSMC_I”, “VSMC_II”, “Pericyte”, “Mesothelial”, “Macrophage”, “Fibroblast_II”, “Fibroblast_I”, “Endothelial_I”, “Endothelial_II”)

#res <- cor(corr_data, method = c(“pearson”)) #round(res,2)

#corr_plot <- corrplot(res, type = “upper”, order = “hclust”, tl.col = “black”, tl.srt = 45, addCoef.col = “black”, number.cex = 1.0)

#heatmap(x = res, symm = TRUE) #####

table(data$AGE)
## 
## 20-29 30-39 40-49 50-59 60-69 70-79 
##    37    38    68   149   131     9
barplot(table(data$AGE))

max(data$exact_age)
## [1] 70
min(data$exact_age)
## [1] 21
data_long <- data %>%
  pivot_longer(cols = c("VSMC_II", "VSMC_I", "Endothelial_I", "Fibroblast_I", "Endothelial_II", "Pericyte", "Macrophage", "Fibroblast_II", "Mesothelial"), names_to = "Cell_Type", values_to = "Fraction")
##removing X_, Lymphatic_Endothelial, Lymphocyte, Neuron
p <- data_long %>%
  group_by(AGE,Cell_Type) %>%
  summarise(mf = mean(Fraction)) %>%
  ungroup()%>%
  ggplot(aes(y = AGE, x = mf, fill = Cell_Type)) +
  geom_bar(stat = "identity") +
  scale_fill_brewer(palette = "Spectral") +
  labs(y = "Age", x = "Fraction of Cell Types", size = 20) +
  theme_minimal() +
  coord_flip()  
## `summarise()` has grouped output by 'AGE'. You can override using the `.groups`
## argument.
p

p <- recordPlot()
png("cellular_composition.png", width = 6, height = 4, units = "in", res = 800, bg = "transparent")
replayPlot(p)
dev.off()
## png 
##   2
#ggplotly(p)

##CHI-SQUARED TEST - to examine the difference between the categorical variables from a random sample in order to judge the goodness of bit between expected and observed.

##BOX-WHISKER PLOTS IN BETWEEN THE DIFFERENT CELL TYPES ##Total cell types = 12

library(metan) data\(exact_age <- as.numeric(data\)exact_age) corr_with_age = data %>% select(‘exact_age’, ‘VSMC_I’, ‘VSMC_II’, ‘Pericyte’, ‘Neuron’, ‘Mesothelial’, ‘Macrophage’, ‘Lymphocyte’, ‘Lymphatic_Endothelial’, ‘Fibroblast_II’, ‘Fibroblast_I’, ‘Endothelial_I’, ‘Endothelial_II’, ‘X_’)

all <- corr_coef( corr_with_age, use = c(“complete.obs”) )

all$cor

all\(cor[is.na(all\)cor)] <- 0

all$cor

#image(all, main = “Correlation Matrix Heatmap”) library(pheatmap)

pheatmap(all$cor, display_numbers = TRUE) ###