library(vcfR)
## 
##    *****       ***   vcfR   ***       *****
##    This is vcfR 1.15.0 
##      browseVignettes('vcfR') # Documentation
##      citation('vcfR') # Citation
##    *****       *****      *****       *****
library(ggplot2)
#vcf_aao <- read.vcfR("/home/mchopra/Documents/PhD-Year1/GTEx/GTEx_aao.vcf") ##335 variant count
#vcf_aao <- read.vcfR("/home/mchopra/Documents/PhD-Year1/GTEx/leadsnps_aao.vcf")
#vcf_dao <- read.vcfR("/home/mchopra/Documents/PhD-Year1/GTEx/GTEx_dao.vcf") ##114 variant count
vcf_aao <-  read.vcfR("/home/mchopra/Documents/PhD-Year1/GTEx/cojo_aao.vcf")
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 3389
##   header_line: 3390
##   variant count: 9
##   column count: 847
## 
Meta line 1000 read in.
Meta line 2000 read in.
Meta line 3000 read in.
Meta line 3389 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 9
##   Character matrix gt cols: 847
##   skip: 0
##   nrows: 9
##   row_num: 0
## 
Processed variant: 9
## All variants processed
cibersort_results <- read.csv("/home/mchopra/Documents/PhD-Year1/deconvolution/Deconvolution_results/results/CIBERSORTx_Results.csv") ##

Relationship of distensibility with age –the following relationship is with the normalised phenotypes

##age and sex is present below:
pheno_dis <- read.table("/home/mchopra/Documents/PhD-Year1/PRS/pgs_covars2.txt", header = T)
aao_norm_dist <- read.table("/home/mchopra/Documents/PhD-Year1/GWAS/norm_pheno_aao.txt", header = T, sep = ",")
rownames(pheno_dis) <- pheno_dis[,1]
rownames(aao_norm_dist) <- aao_norm_dist[,1]

intersect <- intersect(rownames(pheno_dis), rownames(aao_norm_dist))

case1 <- pheno_dis[intersect, ] 
aao_norm_dist <- aao_norm_dist[intersect,]

x <- merge(case1, aao_norm_dist, by = "IID", all = TRUE)

fit = lm(AAo_distensibility ~ age + sex, data = x)
summary(fit)
## 
## Call:
## lm(formula = AAo_distensibility ~ age + sex, data = x)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3882 -0.4815  0.0109  0.4933  5.0717 
## 
## Coefficients: (1 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.3498913  0.0450322   96.59   <2e-16 ***
## age         -0.0649360  0.0006831  -95.06   <2e-16 ***
## sex                 NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7806 on 21916 degrees of freedom
##   (25429 observations deleted due to missingness)
## Multiple R-squared:  0.292,  Adjusted R-squared:  0.2919 
## F-statistic:  9037 on 1 and 21916 DF,  p-value: < 2.2e-16
ggplot(x, aes(x = age, y = AAo_distensibility)) +
  geom_point(color = "pink") +
  geom_smooth(method = "lm", color = "darkmagenta")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2694 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2694 rows containing missing values or values outside the scale range
## (`geom_point()`).

pheno_dis <- read.table("/home/mchopra/Documents/PhD-Year1/PRS/pgs_covars2.txt", header = T)
dao_norm_dist <- read.table("/home/mchopra/Documents/PhD-Year1/GWAS/norm_pheno_dao.txt", header = T, sep = ",")
rownames(pheno_dis) <- pheno_dis[,1]
rownames(dao_norm_dist) <- dao_norm_dist[,1]

intersect <- intersect(rownames(pheno_dis), rownames(dao_norm_dist))

case1 <- pheno_dis[intersect, ] 
dao_norm_dist <- dao_norm_dist[intersect,]

y <- merge(case1, dao_norm_dist, by = "IID", all = TRUE)

fit2 = lm(DAo_distensibility ~ age + sex, data = y)
summary(fit2)
## 
## Call:
## lm(formula = DAo_distensibility ~ age + sex, data = y)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7056 -0.5085  0.0129  0.5236  3.6841 
## 
## Coefficients: (1 not defined because of singularities)
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.445600   0.046102   96.43   <2e-16 ***
## age         -0.067323   0.000699  -96.31   <2e-16 ***
## sex                NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.796 on 21827 degrees of freedom
##   (25298 observations deleted due to missingness)
## Multiple R-squared:  0.2982, Adjusted R-squared:  0.2982 
## F-statistic:  9275 on 1 and 21827 DF,  p-value: < 2.2e-16
ggplot(y, aes(x = age, y = DAo_distensibility)) +
  geom_point(color = "pink") +
  geom_smooth(method = "lm", color = "darkmagenta")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2693 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2693 rows containing missing values or values outside the scale range
## (`geom_point()`).

##Analysis for Aortic Ascending aorta 
metadata <- vcf_aao@meta
fixed_data <- vcf_aao@fix
genotype_data <- vcf_aao@gt
combined_data <- cbind(fixed_data, genotype_data)

##now, see relationship of genotype with cellular proportion

# Change genotype encoding from 0/0,0/1,1/1 to 0,1,2
genotype_to_numeric <- function(par){
 par.out <- par
 par.out[which(par == "0|0")] <- 0
 par.out[which(par == "0|1")] <- 1
 par.out[which(par == "1|1")] <- 2
 par.out[which(par == "1|0")] <- 1 ##no difference in 1|0 or 0|1, by convention it is written as 0|1 
 #par.out <- as.numeric(par.out)
 #par.out[is.na(par.out)] <- -1
return(as.numeric(par.out))
}
combined_data[, -(1:9)] <- apply(combined_data[, -(1:9)], 2, genotype_to_numeric)
transposed_data <- t(combined_data)
write.csv(transposed_data, "genotype_GTExsamples.csv", row.names = TRUE)

genotype_gtex  <- read.csv("genotype_GTExsamples.csv")
new_colnames <- as.character(genotype_gtex[3, ])
colnames(genotype_gtex) <- new_colnames
colnames(genotype_gtex)[1] <- "Mixture"
genotype_gtex[1:10, 1:10]
##       Mixture              chr5_96269410             chr5_123214952
## 1       CHROM                       chr5                       chr5
## 2         POS                   96269410                  123214952
## 3          ID              chr5_96269410             chr5_123214952
## 4         REF                          C                          T
## 5         ALT                          T                          C
## 6        QUAL                       <NA>                       <NA>
## 7      FILTER                       PASS                       PASS
## 8        INFO AN=1676;AF=0.320406;AC=537 AN=1676;AF=0.233294;AC=391
## 9      FORMAT                         GT                         GT
## 10 GTEX-1117F                          1                          0
##                chr5_123425370              chr7_74016839
## 1                        chr5                       chr7
## 2                   123425370                   74016839
## 3              chr5_123425370              chr7_74016839
## 4                           G                          A
## 5                           C                          G
## 6                        <NA>                       <NA>
## 7                        PASS                       PASS
## 8  AN=1676;AF=0.198687;AC=333 AN=1676;AF=0.438544;AC=735
## 9                          GT                         GT
## 10                          0                          0
##                  chr8_74853712               chr8_75672651
## 1                         chr8                        chr8
## 2                     74853712                    75672651
## 3                chr8_74853712               chr8_75672651
## 4                            G                           G
## 5                            A                           A
## 6                         <NA>                        <NA>
## 7                         PASS                        PASS
## 8  AN=1676;AF=0.0871122;AC=146 AN=1676;AF=0.652148;AC=1093
## 9                           GT                          GT
## 10                           1                           1
##                chr8_121627730              chr10_29878825
## 1                        chr8                       chr10
## 2                   121627730                    29878825
## 3              chr8_121627730              chr10_29878825
## 4                           T                           G
## 5                           C                           A
## 6                        <NA>                        <NA>
## 7                        PASS                        PASS
## 8  AN=1676;AF=0.295943;AC=496 AN=1676;AF=0.615752;AC=1032
## 9                          GT                          GT
## 10                          1                           1
##                chr16_83029235
## 1                       chr16
## 2                    83029235
## 3              chr16_83029235
## 4                           G
## 5                           C
## 6                        <NA>
## 7                        PASS
## 8  AN=1676;AF=0.130072;AC=218
## 9                          GT
## 10                          0
genotype_gtex <- genotype_gtex[-c(1:9), ] ##removed the header of 9 lines here
genotype_gtex[1:10, 1:10]
##       Mixture chr5_96269410 chr5_123214952 chr5_123425370 chr7_74016839
## 10 GTEX-1117F             1              0              0             0
## 11 GTEX-111CU             0              2              1             1
## 12 GTEX-111FC             1              0              0             1
## 13 GTEX-111VG             0              1              1             0
## 14 GTEX-111YS             0              0              0             1
## 15 GTEX-1122O             0              1              0             1
## 16 GTEX-1128S             2              0              0             1
## 17 GTEX-113IC             0              1              1             1
## 18 GTEX-113JC             1              0              1             1
## 19 GTEX-117XS             0              0              1             2
##    chr8_74853712 chr8_75672651 chr8_121627730 chr10_29878825 chr16_83029235
## 10             1             1              1              1              0
## 11             0             1              0              1              0
## 12             0             2              1              1              0
## 13             1             0              1              1              0
## 14             0             2              0              1              1
## 15             0             2              0              1              0
## 16             0             2              1              2              0
## 17             0             1              1              2              0
## 18             0             2              1              2              0
## 19             0             2              1              0              0
cibersort_results[1:5, 1:15]
##      Mixture   AGE Mid.age exact_age SEX DTHHRDY   VSMC_II    VSMC_I
## 1 GTEX-111YS 60-69      65        62   1       0 0.4174776 0.3876466
## 2 GTEX-1122O 60-69      65        64   2       0 0.5353880 0.3373091
## 3 GTEX-1128S 60-69      65        66   2       2 0.5548107 0.2345628
## 4 GTEX-117XS 60-69      65        64   1       2 0.5014955 0.3923229
## 5 GTEX-117YW 50-59      55        58   1       3 0.6688973 0.1841796
##   Endothelial_I Fibroblast_I Endothelial_II Pericyte X_  Macrophage
## 1             0            0      0.1862120        0  0 0.008663851
## 2             0            0      0.1273029        0  0 0.000000000
## 3             0            0      0.1996633        0  0 0.010963268
## 4             0            0      0.1036002        0  0 0.002581379
## 5             0            0      0.1469231        0  0 0.000000000
##   Lymphatic_Endothelial
## 1                     0
## 2                     0
## 3                     0
## 4                     0
## 5                     0

##now add the cibersortx results in this sheet

combined_results <- merge(cibersort_results, genotype_gtex, by = "Mixture")

#making data in long format

library(tidyr)
#combined_results

#long_combined_results <- gather(combined_results, key = "Celltype", value = "composition", VSMC_I, VSMC_II, Pericyte, Neuron, Mesothelial, Macrophage, Lymphocyte, Lymphatic_Endothelial, Fibroblast_II, Fibroblast_I, Endothelial_I, Endothelial_II, X_)

#long_combined_results <- gather(combined_results, key = "leadsnps", value = "genotype", chr3_41850431, chr5_96269410, chr5_123214952, chr7_74016839, chr8_74868420, chr8_75672651, chr8_121627786, chr8_75672651, chr8_121627786, chr8_123594919, chr10_29794728, chr16_83029235 )

#long_combined_results

##for just one sample - t test ##not for me as t-test does the comparison of mean in two normally distributed groups

loci <- colnames(combined_results)[grep("chr", colnames(combined_results))]

res <- unlist(lapply(loci, function(x){

t.test(combined_results$Macrophage[combined_results[[x]] == 2], combined_results$Macrophage[combined_results[[x]] != 2])$p.value}))
#anova = aov(combined_results$Macrophage ~ factor(combined_results$chr10_29794728))
#summary(anova)
#TukeyHSD(anova) # not useful

names(res) <- loci
res[order(res)]
##  chr8_75672651  chr7_74016839 chr16_83029235 chr5_123214952 chr8_121627730 
##   0.0004841716   0.4028182218   0.4225738593   0.4447695042   0.4505115584 
## chr5_123425370  chr5_96269410 chr10_29878825  chr8_74853712 
##   0.5555902626   0.5974868835   0.9521984722   0.9575837229
head(res[order(res)])
##  chr8_75672651  chr7_74016839 chr16_83029235 chr5_123214952 chr8_121627730 
##   0.0004841716   0.4028182218   0.4225738593   0.4447695042   0.4505115584 
## chr5_123425370 
##   0.5555902626
significant_loci <- names(res[res <= 0.05])
significant_results <- res[res <= 0.05]

# Display significant loci and their corresponding p-values
data.frame(Locus = significant_loci, P_Value = significant_results)
##                       Locus      P_Value
## chr8_75672651 chr8_75672651 0.0004841716

##for all together – t-test (cases -> 2 and not 2)

variables_of_interest <- c("VSMC_I", "VSMC_II", "Pericyte", "Neuron", "Mesothelial", "Macrophage",
                            "Lymphocyte", "Lymphatic_Endothelial", "Fibroblast_II", "Fibroblast_I",
                            "Endothelial_I", "Endothelial_II", "X_")                                            
loci <- colnames(combined_results)[grep("chr", colnames(combined_results))]

# Create an empty list to store results for each variable
results_list <- list()

# Iterate through each variable
for (variable in variables_of_interest) {
  # Perform t-test for the current variable
  res <- unlist(lapply(loci, function(x) {
    ttest_result <- t.test(
      combined_results[[variable]][combined_results[[x]] == 2],
      combined_results[[variable]][combined_results[[x]] != 2],
      na.action = na.omit  # Handle missing values
    )
    p_value <- ttest_result$p.value
    return(p_value)
  }))
  
  # Store the results in the list with the variable name as the key
  names(res) <- loci
  results_list[[variable]] <- res
}

# Display significant results for each variable
all_results <- data.frame()
for (variable in variables_of_interest) {
  significant_loci <- names(results_list[[variable]][results_list[[variable]] <= 0.05])
  significant_results <- results_list[[variable]][results_list[[variable]] <= 0.05]
  
  # Create a data frame for the current variable
  variable_results <- data.frame(
    Variable = rep(variable, length(significant_loci)),
    Locus = significant_loci,
    P_Value = significant_results
  )
  
  # Append the data frame to the overall results data frame
  all_results <- rbind(all_results, variable_results)
}

# Print the final data frame with all significant results
t_results <- print(all_results[complete.cases(all_results), ])
##                      Variable          Locus      P_Value
## chr8_121627730        VSMC_II chr8_121627730 0.0495830041
## chr16_83029235       Pericyte chr16_83029235 0.0395263077
## chr8_74853712     Mesothelial  chr8_74853712 0.0014798323
## chr8_75672651      Macrophage  chr8_75672651 0.0004841716
## chr5_123425370   Fibroblast_I chr5_123425370 0.0015940147
## chr8_1216277301  Fibroblast_I chr8_121627730 0.0027957622
## chr5_96269410   Endothelial_I  chr5_96269410 0.0303625695
## chr5_123214952  Endothelial_I chr5_123214952 0.0358451216
## chr5_1234253701 Endothelial_I chr5_123425370 0.0231940811
#write.csv(results, file = "significant_loci_cellular_level.csv")
#results
t_bonf <- p.adjust(t_results$P_Value, method = "bonferroni")
t_fdr <- p.adjust(t_results$P_Value, method = "fdr")
t_results$bonferroni <- t_bonf
t_results$fdr <- t_fdr
t_results
##                      Variable          Locus      P_Value  bonferroni
## chr8_121627730        VSMC_II chr8_121627730 0.0495830041 0.446247036
## chr16_83029235       Pericyte chr16_83029235 0.0395263077 0.355736770
## chr8_74853712     Mesothelial  chr8_74853712 0.0014798323 0.013318491
## chr8_75672651      Macrophage  chr8_75672651 0.0004841716 0.004357545
## chr5_123425370   Fibroblast_I chr5_123425370 0.0015940147 0.014346132
## chr8_1216277301  Fibroblast_I chr8_121627730 0.0027957622 0.025161860
## chr5_96269410   Endothelial_I  chr5_96269410 0.0303625695 0.273263126
## chr5_123214952  Endothelial_I chr5_123214952 0.0358451216 0.322606094
## chr5_1234253701 Endothelial_I chr5_123425370 0.0231940811 0.208746730
##                         fdr
## chr8_121627730  0.049583004
## chr16_83029235  0.044467096
## chr8_74853712   0.004782044
## chr8_75672651   0.004357545
## chr5_123425370  0.004782044
## chr8_1216277301 0.006290465
## chr5_96269410   0.044467096
## chr5_123214952  0.044467096
## chr5_1234253701 0.041749346

t test (o and not 0)

variables_of_interest <- c("VSMC_I", "VSMC_II", "Pericyte", "Neuron", "Mesothelial", "Macrophage",
                            "Lymphocyte", "Lymphatic_Endothelial", "Fibroblast_II", "Fibroblast_I",
                            "Endothelial_I", "Endothelial_II", "X_")                                            
loci <- colnames(combined_results)[grep("chr", colnames(combined_results))]

# Create an empty list to store results for each variable
results_list <- list()

# Iterate through each variable
for (variable in variables_of_interest) {
  # Perform t-test for the current variable
  res <- unlist(lapply(loci, function(x) {
    ttest_result <- t.test(
      combined_results[[variable]][combined_results[[x]] == 0],
      combined_results[[variable]][combined_results[[x]] != 0],
      na.action = na.omit  # Handle missing values
    )
    p_value <- ttest_result$p.value
    return(p_value)
  }))
  
  # Store the results in the list with the variable name as the key
  names(res) <- loci
  results_list[[variable]] <- res
}

# Display significant results for each variable
all_results <- data.frame()
for (variable in variables_of_interest) {
  significant_loci <- names(results_list[[variable]][results_list[[variable]] <= 0.05])
  significant_results <- results_list[[variable]][results_list[[variable]] <= 0.05]
  
  # Create a data frame for the current variable
  variable_results <- data.frame(
    Variable = rep(variable, length(significant_loci)),
    Locus = significant_loci,
    P_Value = significant_results
  )
  
  # Append the data frame to the overall results data frame
  all_results <- rbind(all_results, variable_results)
}

# Print the final data frame with all significant results
t_results_0 <- print(all_results[complete.cases(all_results), ])
##                       Variable          Locus     P_Value
## chr16_83029235         VSMC_II chr16_83029235 0.034561848
## chr5_96269410         Pericyte  chr5_96269410 0.046309459
## chr16_830292351       Pericyte chr16_83029235 0.015583055
## chr8_75672651      Mesothelial  chr8_75672651 0.001468874
## chr10_29878825     Mesothelial chr10_29878825 0.010886859
## chr7_74016839     Fibroblast_I  chr7_74016839 0.016523273
## chr8_756726511    Fibroblast_I  chr8_75672651 0.005104568
## chr10_298788251   Fibroblast_I chr10_29878825 0.001585158
## chr5_962694101  Endothelial_II  chr5_96269410 0.016460684
#write.csv(results, file = "significant_loci_cellular_level.csv")
#results
t_bonf <- p.adjust(t_results_0$P_Value, method = "bonferroni")
t_fdr <- p.adjust(t_results_0$P_Value, method = "fdr")
t_results_0$bonferroni <- t_bonf
t_results_0$fdr <- t_fdr
t_results_0
##                       Variable          Locus     P_Value bonferroni
## chr16_83029235         VSMC_II chr16_83029235 0.034561848 0.31105663
## chr5_96269410         Pericyte  chr5_96269410 0.046309459 0.41678513
## chr16_830292351       Pericyte chr16_83029235 0.015583055 0.14024749
## chr8_75672651      Mesothelial  chr8_75672651 0.001468874 0.01321986
## chr10_29878825     Mesothelial chr10_29878825 0.010886859 0.09798173
## chr7_74016839     Fibroblast_I  chr7_74016839 0.016523273 0.14870946
## chr8_756726511    Fibroblast_I  chr8_75672651 0.005104568 0.04594112
## chr10_298788251   Fibroblast_I chr10_29878825 0.001585158 0.01426642
## chr5_962694101  Endothelial_II  chr5_96269410 0.016460684 0.14814616
##                         fdr
## chr16_83029235  0.038882079
## chr5_96269410   0.046309459
## chr16_830292351 0.021244208
## chr8_75672651   0.007133211
## chr10_29878825  0.021244208
## chr7_74016839   0.021244208
## chr8_756726511  0.015313705
## chr10_298788251 0.007133211
## chr5_962694101  0.021244208

##LOOKS SUSPICIOUS!!! ##TRYING LINEAR MODELLING NOW

library(RegParallel)
## Loading required package: doParallel
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
## Loading required package: data.table
## Loading required package: stringr
## Loading required package: survival
## Loading required package: arm
## Loading required package: MASS
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loading required package: lme4
## 
## arm (Version 1.13-1, built: 2022-8-25)
## Working directory is /home/mchopra/Documents/PhD-Year1/GTEx
#celltypes of interest - VSMC_I, VSMC_II, Endothelial_I, Endothelial_II, Fibroblast_I, Fibroblast_II, Pericyte, Macrophage, Mesothelial only. Not including Lymphatic_Endothelial, Lymphocyte and Neuron as i got no cellular proportion of them in the GTEx samples.   
celltypes <- c("VSMC_I", "VSMC_II", "Pericyte", "Mesothelial", "Macrophage", "Fibroblast_II", "Fibroblast_I", "Endothelial_I", "Endothelial_II")   

#fit <- lm(VSMC_II ~ as.numeric(chr3_41850431), data=combined_results)
#summary(fit)
#as.numeric because of additive genetic model 

#fc = as.factor(combined_results$chr3_41850431)
#ggpubr::compare_means(VSMC_II ~ chr3_41850431, data=combined_results, ref.group = "0", method = "t.test")

#anova = aov(combined_results$VSMC_II ~ as.factor(combined_results$chr3_41850431))
#TukeyHSD(anova)

macro_res1 <- RegParallel(
  data = combined_results,
  formula = 'VSMC_II ~ as.numeric([*])',
  FUN = function(formula, data)
    lm(formula = formula,
          data = data),
  FUNtype = 'lm',
  variables = colnames(combined_results)[23:31],
  blocksize = 9,
  p.adjust = "BH",
  cores = 2,
  nestedParallel = FALSE,
  conflevel = 95)
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- VSMC_II
## First 5 formulae:
## -- VSMC_II ~ as.numeric(chr5_96269410)
## -- VSMC_II ~ as.numeric(chr5_123214952)
## -- VSMC_II ~ as.numeric(chr5_123425370)
## -- VSMC_II ~ as.numeric(chr7_74016839)
## -- VSMC_II ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
# Define the response variables
celltypes <- c("VSMC_I", "VSMC_II", "Pericyte", "Mesothelial", "Macrophage", "Fibroblast_II", "Fibroblast_I", "Endothelial_I", "Endothelial_II")

# Initialize a list to store the model results
macro_res_list <- list()

# Use a for loop to fit linear regression models for each response variable
for (ctypes in celltypes) {
  formula <- paste(ctypes, '~ as.numeric([*])')

  # Note: Replace 'as.numeric([*])' with the actual predictor variable
  # you want to use. I've used a placeholder here.

  macro_res <- RegParallel(
    data = combined_results,
    formula = formula,
    FUN = function(formula, data)
      lm(formula = formula, data = data),
    FUNtype = 'lm',
    variables = colnames(combined_results)[23:31],
    blocksize = 9,
    p.adjust = "BH",
    cores = 2,
    nestedParallel = FALSE,
    conflevel = 95
  )

  # Store the results in the list
  macro_res_list[[ctypes]] <- macro_res
}
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- VSMC_I
## First 5 formulae:
## -- VSMC_I ~ as.numeric(chr5_96269410)
## -- VSMC_I ~ as.numeric(chr5_123214952)
## -- VSMC_I ~ as.numeric(chr5_123425370)
## -- VSMC_I ~ as.numeric(chr7_74016839)
## -- VSMC_I ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- VSMC_II
## First 5 formulae:
## -- VSMC_II ~ as.numeric(chr5_96269410)
## -- VSMC_II ~ as.numeric(chr5_123214952)
## -- VSMC_II ~ as.numeric(chr5_123425370)
## -- VSMC_II ~ as.numeric(chr7_74016839)
## -- VSMC_II ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Pericyte
## First 5 formulae:
## -- Pericyte ~ as.numeric(chr5_96269410)
## -- Pericyte ~ as.numeric(chr5_123214952)
## -- Pericyte ~ as.numeric(chr5_123425370)
## -- Pericyte ~ as.numeric(chr7_74016839)
## -- Pericyte ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Mesothelial
## First 5 formulae:
## -- Mesothelial ~ as.numeric(chr5_96269410)
## -- Mesothelial ~ as.numeric(chr5_123214952)
## -- Mesothelial ~ as.numeric(chr5_123425370)
## -- Mesothelial ~ as.numeric(chr7_74016839)
## -- Mesothelial ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Macrophage
## First 5 formulae:
## -- Macrophage ~ as.numeric(chr5_96269410)
## -- Macrophage ~ as.numeric(chr5_123214952)
## -- Macrophage ~ as.numeric(chr5_123425370)
## -- Macrophage ~ as.numeric(chr7_74016839)
## -- Macrophage ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Fibroblast_II
## First 5 formulae:
## -- Fibroblast_II ~ as.numeric(chr5_96269410)
## -- Fibroblast_II ~ as.numeric(chr5_123214952)
## -- Fibroblast_II ~ as.numeric(chr5_123425370)
## -- Fibroblast_II ~ as.numeric(chr7_74016839)
## -- Fibroblast_II ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Fibroblast_I
## First 5 formulae:
## -- Fibroblast_I ~ as.numeric(chr5_96269410)
## -- Fibroblast_I ~ as.numeric(chr5_123214952)
## -- Fibroblast_I ~ as.numeric(chr5_123425370)
## -- Fibroblast_I ~ as.numeric(chr7_74016839)
## -- Fibroblast_I ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Endothelial_I
## First 5 formulae:
## -- Endothelial_I ~ as.numeric(chr5_96269410)
## -- Endothelial_I ~ as.numeric(chr5_123214952)
## -- Endothelial_I ~ as.numeric(chr5_123425370)
## -- Endothelial_I ~ as.numeric(chr7_74016839)
## -- Endothelial_I ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
## 
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Endothelial_II
## First 5 formulae:
## -- Endothelial_II ~ as.numeric(chr5_96269410)
## -- Endothelial_II ~ as.numeric(chr5_123214952)
## -- Endothelial_II ~ as.numeric(chr5_123425370)
## -- Endothelial_II ~ as.numeric(chr7_74016839)
## -- Endothelial_II ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
lm_results <- dplyr::bind_rows(macro_res_list, .id ="celltypes")

##checking using plots!!

lm and glm(for non-normal distribution data) does give the same results ##not anything significant

library(NatParksPalettes)
library(ggplot2)
fit <- lm((Endothelial_I) ~ as.numeric(chr8_74853712), data = combined_results)
summary(fit)
## 
## Call:
## lm(formula = (Endothelial_I) ~ as.numeric(chr8_74853712), data = combined_results)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.007419 -0.000434 -0.000434 -0.000434  0.119312 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)   
## (Intercept)               0.0004344  0.0004504   0.964  0.33550   
## as.numeric(chr8_74853712) 0.0034924  0.0010826   3.226  0.00136 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.008245 on 385 degrees of freedom
## Multiple R-squared:  0.02632,    Adjusted R-squared:  0.02379 
## F-statistic: 10.41 on 1 and 385 DF,  p-value: 0.001362
ggplot(data = combined_results, aes(x = chr8_74853712, y = Endothelial_I, group = chr8_74853712, color = chr8_74853712)) +
  geom_jitter() +
  geom_boxplot() +
  geom_abline(slope = coef(fit)[2], intercept = coef(fit)[1], color = "purple") +
  theme_classic()  

fit2 = lm(Mesothelial ~ as.numeric(chr8_121627730), data=combined_results)
summary(fit2)
## 
## Call:
## lm(formula = Mesothelial ~ as.numeric(chr8_121627730), data = combined_results)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.002222 -0.001199 -0.000176 -0.000176  0.062672 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                0.0001756  0.0003206   0.548   0.5842   
## as.numeric(chr8_121627730) 0.0010233  0.0003875   2.641   0.0086 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.004602 on 385 degrees of freedom
## Multiple R-squared:  0.01779,    Adjusted R-squared:  0.01524 
## F-statistic: 6.975 on 1 and 385 DF,  p-value: 0.008603
ggplot(data = combined_results, aes(x = chr8_121627730, y = Mesothelial, group = chr8_121627730)) +
  geom_jitter() +
  geom_boxplot() +
  geom_abline(slope = coef(fit2)[2], intercept = coef(fit2)[1], color = "purple") +
  theme_classic()

fit3 = lm(Fibroblast_I ~ as.numeric(chr7_74016839), data=combined_results)
summary(fit3)
## 
## Call:
## lm(formula = Fibroblast_I ~ as.numeric(chr7_74016839), data = combined_results)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.003912 -0.001940 -0.001940  0.000032  0.130288 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)               -3.201e-05  9.214e-04  -0.035   0.9723  
## as.numeric(chr7_74016839)  1.972e-03  7.980e-04   2.471   0.0139 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01096 on 385 degrees of freedom
## Multiple R-squared:  0.01562,    Adjusted R-squared:  0.01306 
## F-statistic: 6.108 on 1 and 385 DF,  p-value: 0.01389
ggplot(data = combined_results, aes(x = chr7_74016839, y = Fibroblast_I, group = chr7_74016839)) +
  geom_jitter() +
  geom_boxplot() +
  geom_abline(slope = coef(fit3)[2], intercept = coef(fit3)[1], color = "purple") +
  theme_classic()

fit4 = lm(Endothelial_II ~ as.numeric(chr8_74853712) + exact_age + SEX, data=combined_results)
summary(fit4)
## 
## Call:
## lm(formula = Endothelial_II ~ as.numeric(chr8_74853712) + exact_age + 
##     SEX, data = combined_results)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.16743 -0.05450 -0.01126  0.04884  0.27393 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                0.2370118  0.0200085  11.846   <2e-16 ***
## as.numeric(chr8_74853712)  0.0246095  0.0100597   2.446   0.0149 *  
## exact_age                 -0.0001514  0.0003037  -0.498   0.6184    
## SEX                       -0.0036256  0.0081351  -0.446   0.6561    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07655 on 383 degrees of freedom
## Multiple R-squared:  0.01617,    Adjusted R-squared:  0.008463 
## F-statistic: 2.098 on 3 and 383 DF,  p-value: 0.09999
ggplot(data = combined_results, aes(x = chr8_74853712, y = Endothelial_II, group = chr8_74853712, color = chr8_74853712, fill = chr8_74853712)) +
  geom_jitter() +
  geom_boxplot() +
  geom_abline(slope = coef(fit4)[2], intercept = coef(fit4)[1], color = "purple") +
  theme_classic() +
  scale_fill_manual(values = natparks.pals("Charmonix", 3)) +
  labs(y= "Endothelial_II", x = "PI15/RP11-6I2.3") + guides(color=FALSE)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

fit5 = lm(VSMC_I ~ as.numeric(chr10_29878825) + exact_age + SEX , data=combined_results)
summary(fit5)
## 
## Call:
## lm(formula = VSMC_I ~ as.numeric(chr10_29878825) + exact_age + 
##     SEX, data = combined_results)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.29131 -0.06599  0.00219  0.07719  0.35332 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 0.3568465  0.0317453  11.241   <2e-16 ***
## as.numeric(chr10_29878825) -0.0193452  0.0084722  -2.283   0.0230 *  
## exact_age                  -0.0008967  0.0004441  -2.019   0.0441 *  
## SEX                        -0.0165990  0.0119005  -1.395   0.1639    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1119 on 383 degrees of freedom
## Multiple R-squared:  0.02625,    Adjusted R-squared:  0.01862 
## F-statistic: 3.442 on 3 and 383 DF,  p-value: 0.01692
ggplot(data = combined_results, aes(x = chr10_29878825, y = VSMC_I, group = chr10_29878825, color = chr10_29878825, fill = chr10_29878825)) +
  geom_jitter() +
  geom_boxplot() +
  geom_abline(slope = coef(fit5)[2], intercept = coef(fit5)[1], color = "purple") +
  theme_classic() +
  scale_fill_manual(values = natparks.pals("Charmonix", 3)) +
  labs(y= "Vascular Smooth Muscle Cells I", x = "SVIL genotype") + guides(color=FALSE)

fit6 = lm(Pericyte ~ as.numeric(chr5_96269410), data=combined_results)
summary(fit6)
## 
## Call:
## lm(formula = Pericyte ~ as.numeric(chr5_96269410), data = combined_results)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.002828 -0.002828 -0.001639 -0.001639  0.085631 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                0.0028283  0.0005960   4.746 2.94e-06 ***
## as.numeric(chr5_96269410) -0.0011897  0.0006087  -1.954   0.0514 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.00844 on 385 degrees of freedom
## Multiple R-squared:  0.009824,   Adjusted R-squared:  0.007252 
## F-statistic:  3.82 on 1 and 385 DF,  p-value: 0.05138
ggplot(data = combined_results, aes(x = chr5_96269410, y = Pericyte, group = chr5_96269410)) +
  geom_jitter() +
  geom_boxplot() +
  geom_abline(slope = coef(fit6)[2], intercept = coef(fit6)[1], color = "purple") +
  theme_classic()

fit7 = lm(VSMC_II ~ as.numeric(chr10_29878825) + exact_age + SEX , data=combined_results)
summary(fit7)
## 
## Call:
## lm(formula = VSMC_II ~ as.numeric(chr10_29878825) + exact_age + 
##     SEX, data = combined_results)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.239136 -0.052667  0.002132  0.054667  0.306668 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                0.3716536  0.0229092  16.223  < 2e-16 ***
## as.numeric(chr10_29878825) 0.0134282  0.0061140   2.196 0.028670 *  
## exact_age                  0.0012160  0.0003205   3.794 0.000172 ***
## SEX                        0.0124025  0.0085881   1.444 0.149515    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08073 on 383 degrees of freedom
## Multiple R-squared:  0.04938,    Adjusted R-squared:  0.04193 
## F-statistic: 6.631 on 3 and 383 DF,  p-value: 0.0002238
ggplot(data = combined_results, aes(x = chr10_29878825, y = VSMC_II, group = chr10_29878825, color = chr10_29878825, fill = chr10_29878825)) +
  geom_jitter() +
  geom_boxplot() +
  geom_abline(slope = coef(fit7)[2], intercept = coef(fit7)[1], color = "purple", size = 1) +
  theme_classic() +
  scale_fill_manual(values = natparks.pals("Charmonix", 3)) +
  labs(y= "Vascular Smooth Muscle Cells II", x = "SVIL genotype") +
  guides(fill = FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#p <- grid.arrange(b, c, a, ncol = 3, widths = c(6,6,6))

#p

#ggsave("/home/mchopra/Documents/PhD-Year1/GWAS/genotype_cellularcomposition.png", p, width = 20, height = 6, dpi = 800)
fit4 = lm(Endothelial_II ~ as.numeric(chr8_74853712), data=combined_results)
summary(fit4)
## 
## Call:
## lm(formula = Endothelial_II ~ as.numeric(chr8_74853712), data = combined_results)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.16801 -0.05718 -0.01184  0.04786  0.26897 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               0.224279   0.004174  53.735   <2e-16 ***
## as.numeric(chr8_74853712) 0.024345   0.010031   2.427   0.0157 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0764 on 385 degrees of freedom
## Multiple R-squared:  0.01507,    Adjusted R-squared:  0.01251 
## F-statistic:  5.89 on 1 and 385 DF,  p-value: 0.01568
ggplot(data = combined_results, aes(x = chr8_74853712, y = Endothelial_II, group = chr8_74853712, fill = chr8_74853712)) +
  geom_boxplot(outlier.shape = NA) +
  geom_point(position = position_jitterdodge(), color = "chocolate1", alpha = 0.5) +
  geom_abline(slope = coef(fit4)[2], intercept = coef(fit4)[1], color = "purple") +
  theme_minimal() +
  labs(y= "Endothelial_II", x = "PI15/RP11-6I2.3", ylim(0,0.8)) + 
  guides(fill=FALSE) +
  coord_cartesian(ylim = c(0,0.8))

######

fit5 = lm(VSMC_I ~ as.numeric(chr10_29878825) + exact_age, data=combined_results)
summary(fit5)
## 
## Call:
## lm(formula = VSMC_I ~ as.numeric(chr10_29878825) + exact_age, 
##     data = combined_results)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28471 -0.06292  0.00091  0.07966  0.35906 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 0.3320838  0.0263492  12.603   <2e-16 ***
## as.numeric(chr10_29878825) -0.0187054  0.0084702  -2.208   0.0278 *  
## exact_age                  -0.0008688  0.0004442  -1.956   0.0512 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.112 on 384 degrees of freedom
## Multiple R-squared:  0.02131,    Adjusted R-squared:  0.01621 
## F-statistic:  4.18 on 2 and 384 DF,  p-value: 0.01601
ggplot(data = combined_results, aes(x = chr10_29878825, y = VSMC_I, group = chr10_29878825, fill = chr10_29878825)) +
  geom_boxplot(outlier.shape = NA) +
  geom_point(position = position_jitterdodge(), color = "chocolate1", alpha = 0.5) +
  geom_abline(slope = coef(fit5)[2], intercept = coef(fit5)[1], color = "purple") +
  theme_minimal() +
  labs(y= "Vascular Smooth Muscle Cells I", x = "SVIL") + 
  guides(fill=FALSE) +
  coord_cartesian(ylim = c(0,0.8))  +
   theme(legend.text = element_text(size = 20))

#ggsave("/home/mchopra/Documents/PhD-Year1/GTEx/vsmc1VSgenotype.png", y)
###### 

fit7 = lm(VSMC_II ~ as.numeric(chr10_29878825) + exact_age, data=combined_results)
summary(fit7)
## 
## Call:
## lm(formula = VSMC_II ~ as.numeric(chr10_29878825) + exact_age, 
##     data = combined_results)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.240488 -0.051188  0.000293  0.054271  0.314618 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                0.3901558  0.0190186  20.514  < 2e-16 ***
## as.numeric(chr10_29878825) 0.0129501  0.0061137   2.118 0.034798 *  
## exact_age                  0.0011951  0.0003206   3.728 0.000222 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08085 on 384 degrees of freedom
## Multiple R-squared:  0.0442, Adjusted R-squared:  0.03922 
## F-statistic: 8.879 on 2 and 384 DF,  p-value: 0.0001699
ggplot(data = combined_results, aes(x = chr10_29878825, y = VSMC_II, group = chr10_29878825, fill = chr10_29878825)) +
  geom_boxplot(outlier.shape = NA) +
  geom_point(position = position_jitterdodge(), color = "chocolate1", alpha = 0.5) +
 # geom_abline(slope = coef(fit7)[2], intercept = coef(fit7)[1], color = "purple", size = 1) +
  theme_minimal() +
  labs(y= "Vascular Smooth Muscle Cells II", x = "SVIL") +
  guides(fill = FALSE) +
  coord_cartesian(ylim = c(0,0.8)) +
   theme(legend.text = element_text(size = 20))

####

library(gridExtra)
#ggsave("/home/mchopra/Documents/PhD-Year1/GTEx/vsmc2VSgenotype.png", z)
#plot <- grid.arrange(y, z, ncol = 2, widths = c(8,8))

#ggsave("/home/mchopra/Documents/PhD-Year1/GTEx/genotype_cellularcomposition_final.png", plot, width = 14, height = 8, dpi = 800)
fit5 <- lm(VSMC_I ~ as.numeric(chr10_29878825) + exact_age, data = combined_results)
summary(fit5)
## 
## Call:
## lm(formula = VSMC_I ~ as.numeric(chr10_29878825) + exact_age, 
##     data = combined_results)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28471 -0.06292  0.00091  0.07966  0.35906 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 0.3320838  0.0263492  12.603   <2e-16 ***
## as.numeric(chr10_29878825) -0.0187054  0.0084702  -2.208   0.0278 *  
## exact_age                  -0.0008688  0.0004442  -1.956   0.0512 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.112 on 384 degrees of freedom
## Multiple R-squared:  0.02131,    Adjusted R-squared:  0.01621 
## F-statistic:  4.18 on 2 and 384 DF,  p-value: 0.01601
y <- ggplot(data = combined_results, aes(x = chr10_29878825, y = VSMC_I, group = chr10_29878825, fill = chr10_29878825)) +
  geom_boxplot(outlier.shape = NA) +
  geom_point(position = position_jitterdodge(), color = "chocolate1", alpha = 0.5) +
#  geom_abline(slope = coef(fit5)[2], intercept = coef(fit5)[1], color = "purple") +
  geom_hline(yintercept = median(combined_results$VSMC_I), linetype = "dashed", color = "blue") +
  theme_minimal() +
  labs(y = "Vascular Smooth Muscle Cells I", x = "SVIL", size = 20) +
  guides(fill = FALSE) +
  coord_cartesian(ylim = c(0, 0.8)) +
  theme(legend.text = element_text(size = 20))
y

fit7 = lm(VSMC_II ~ as.numeric(chr10_29878825) + exact_age, data=combined_results)
summary(fit7)
## 
## Call:
## lm(formula = VSMC_II ~ as.numeric(chr10_29878825) + exact_age, 
##     data = combined_results)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.240488 -0.051188  0.000293  0.054271  0.314618 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                0.3901558  0.0190186  20.514  < 2e-16 ***
## as.numeric(chr10_29878825) 0.0129501  0.0061137   2.118 0.034798 *  
## exact_age                  0.0011951  0.0003206   3.728 0.000222 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08085 on 384 degrees of freedom
## Multiple R-squared:  0.0442, Adjusted R-squared:  0.03922 
## F-statistic: 8.879 on 2 and 384 DF,  p-value: 0.0001699
Z <- ggplot(data = combined_results, aes(x = chr10_29878825, y = VSMC_II, group = chr10_29878825, fill = chr10_29878825)) +
  geom_boxplot(outlier.shape = NA) +
  geom_point(position = position_jitterdodge(), color = "chocolate1", alpha = 0.5) +
#  geom_abline(slope = coef(fit7)[2], intercept = coef(fit7)[1], color = "purple") +
  geom_hline(yintercept = median(combined_results$VSMC_II), linetype = "dashed", color = "blue") +
  theme_minimal() +
  labs(y = "Vascular Smooth Muscle Cells II", x = "SVIL", size = 20) +
  guides(fill = FALSE) +
  coord_cartesian(ylim = c(0, 0.8)) +
  theme(legend.text = element_text(size = 20))
Z

#plot <- grid.arrange(y, Z, ncol = 2, widths = c(8,8))

ggsave("/home/mchopra/Documents/PhD-Year1/GTEx/vsmc1vsSVIL.png", y , width = 5, height = 4, dpi = 800)
ggsave("/home/mchopra/Documents/PhD-Year1/GTEx/vsmc2vsSVIL.png", Z , width = 5, height = 4, dpi = 800)

##WORTH NOTICING Endothelial_II, VSMC_II

##maybe and maybe not useful!!

# Load the necessary libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(broom)

# Your chromosome data
chromosomes <- c("chr3_41850431", "chr5_96269410", "chr5_123214952", "chr7_74016839", "chr8_74868420", "chr8_75672651", "chr8_121627786", "chr8_123594919", "chr10_29794728", "chr16_83029235")
#colnames(results_table)[1] <- "leadsnps"

#a <- merge(long_combined_results, results_table, all.y = TRUE, by = "leadsnps")

#rename = rep(c("Intercept", "Slope"), c(2), length.out=7740)
#a$term = rename
#library(lme4)
#fit = lm(Macrophage ~ as.numeric(chr3_41850431) + as.numeric(chr5_96269410) + as.numeric(chr5_123214952) + as.numeric(chr7_74016839) + as.numeric(chr8_74868420) + as.numeric(chr8_75672651) + as.numeric(chr8_121627786) + as.numeric(chr8_123594919) + as.numeric(chr10_29794728) + as.numeric(chr16_83029235), data=combined_results)
summary(fit)
## 
## Call:
## lm(formula = (Endothelial_I) ~ as.numeric(chr8_74853712), data = combined_results)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.007419 -0.000434 -0.000434 -0.000434  0.119312 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)   
## (Intercept)               0.0004344  0.0004504   0.964  0.33550   
## as.numeric(chr8_74853712) 0.0034924  0.0010826   3.226  0.00136 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.008245 on 385 degrees of freedom
## Multiple R-squared:  0.02632,    Adjusted R-squared:  0.02379 
## F-statistic: 10.41 on 1 and 385 DF,  p-value: 0.001362
fit$coefficients
##               (Intercept) as.numeric(chr8_74853712) 
##              0.0004343582              0.0034923832
# this line returns predictions by the model (on the same data) - controls for the effect of Sex 
#combined_results$predict_Macrophage = predict(fit, newdata = combined_results)

library(ggpubr)

# this is a simpler plot, it plots AGE and VSMC_I and comutes the correlation R + pvalue
# but it does not control for sex - we are plotting raw data


#ggplot(data = a, aes(x = genotype, y = Macrophage, group_by(leadsnps), color = genotype)) +
  #facet_wrap(~leadsnps, nrow=2) + 
# geom_jitter() +
  #geom_boxplot() +
 #geom_smooth(method = "lm") +
#  geom_abline(slope =  coef(fit)[2], intercept = coef(fit)[1], color = "red") +
  #theme_classic()