library(vcfR)
##
## ***** *** vcfR *** *****
## This is vcfR 1.15.0
## browseVignettes('vcfR') # Documentation
## citation('vcfR') # Citation
## ***** ***** ***** *****
library(ggplot2)
#vcf_aao <- read.vcfR("/home/mchopra/Documents/PhD-Year1/GTEx/GTEx_aao.vcf") ##335 variant count
#vcf_aao <- read.vcfR("/home/mchopra/Documents/PhD-Year1/GTEx/leadsnps_aao.vcf")
#vcf_dao <- read.vcfR("/home/mchopra/Documents/PhD-Year1/GTEx/GTEx_dao.vcf") ##114 variant count
vcf_aao <- read.vcfR("/home/mchopra/Documents/PhD-Year1/GTEx/cojo_aao.vcf")
## Scanning file to determine attributes.
## File attributes:
## meta lines: 3389
## header_line: 3390
## variant count: 9
## column count: 847
##
Meta line 1000 read in.
Meta line 2000 read in.
Meta line 3000 read in.
Meta line 3389 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
## Character matrix gt rows: 9
## Character matrix gt cols: 847
## skip: 0
## nrows: 9
## row_num: 0
##
Processed variant: 9
## All variants processed
cibersort_results <- read.csv("/home/mchopra/Documents/PhD-Year1/deconvolution/Deconvolution_results/results/CIBERSORTx_Results.csv") ##
Relationship of distensibility with age –the following relationship is with the normalised phenotypes
##age and sex is present below:
pheno_dis <- read.table("/home/mchopra/Documents/PhD-Year1/PRS/pgs_covars2.txt", header = T)
aao_norm_dist <- read.table("/home/mchopra/Documents/PhD-Year1/GWAS/norm_pheno_aao.txt", header = T, sep = ",")
rownames(pheno_dis) <- pheno_dis[,1]
rownames(aao_norm_dist) <- aao_norm_dist[,1]
intersect <- intersect(rownames(pheno_dis), rownames(aao_norm_dist))
case1 <- pheno_dis[intersect, ]
aao_norm_dist <- aao_norm_dist[intersect,]
x <- merge(case1, aao_norm_dist, by = "IID", all = TRUE)
fit = lm(AAo_distensibility ~ age + sex, data = x)
summary(fit)
##
## Call:
## lm(formula = AAo_distensibility ~ age + sex, data = x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3882 -0.4815 0.0109 0.4933 5.0717
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.3498913 0.0450322 96.59 <2e-16 ***
## age -0.0649360 0.0006831 -95.06 <2e-16 ***
## sex NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7806 on 21916 degrees of freedom
## (25429 observations deleted due to missingness)
## Multiple R-squared: 0.292, Adjusted R-squared: 0.2919
## F-statistic: 9037 on 1 and 21916 DF, p-value: < 2.2e-16
ggplot(x, aes(x = age, y = AAo_distensibility)) +
geom_point(color = "pink") +
geom_smooth(method = "lm", color = "darkmagenta")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2694 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2694 rows containing missing values or values outside the scale range
## (`geom_point()`).
pheno_dis <- read.table("/home/mchopra/Documents/PhD-Year1/PRS/pgs_covars2.txt", header = T)
dao_norm_dist <- read.table("/home/mchopra/Documents/PhD-Year1/GWAS/norm_pheno_dao.txt", header = T, sep = ",")
rownames(pheno_dis) <- pheno_dis[,1]
rownames(dao_norm_dist) <- dao_norm_dist[,1]
intersect <- intersect(rownames(pheno_dis), rownames(dao_norm_dist))
case1 <- pheno_dis[intersect, ]
dao_norm_dist <- dao_norm_dist[intersect,]
y <- merge(case1, dao_norm_dist, by = "IID", all = TRUE)
fit2 = lm(DAo_distensibility ~ age + sex, data = y)
summary(fit2)
##
## Call:
## lm(formula = DAo_distensibility ~ age + sex, data = y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7056 -0.5085 0.0129 0.5236 3.6841
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.445600 0.046102 96.43 <2e-16 ***
## age -0.067323 0.000699 -96.31 <2e-16 ***
## sex NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.796 on 21827 degrees of freedom
## (25298 observations deleted due to missingness)
## Multiple R-squared: 0.2982, Adjusted R-squared: 0.2982
## F-statistic: 9275 on 1 and 21827 DF, p-value: < 2.2e-16
ggplot(y, aes(x = age, y = DAo_distensibility)) +
geom_point(color = "pink") +
geom_smooth(method = "lm", color = "darkmagenta")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2693 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2693 rows containing missing values or values outside the scale range
## (`geom_point()`).
##Analysis for Aortic Ascending aorta
metadata <- vcf_aao@meta
fixed_data <- vcf_aao@fix
genotype_data <- vcf_aao@gt
combined_data <- cbind(fixed_data, genotype_data)
##now, see relationship of genotype with cellular proportion
# Change genotype encoding from 0/0,0/1,1/1 to 0,1,2
genotype_to_numeric <- function(par){
par.out <- par
par.out[which(par == "0|0")] <- 0
par.out[which(par == "0|1")] <- 1
par.out[which(par == "1|1")] <- 2
par.out[which(par == "1|0")] <- 1 ##no difference in 1|0 or 0|1, by convention it is written as 0|1
#par.out <- as.numeric(par.out)
#par.out[is.na(par.out)] <- -1
return(as.numeric(par.out))
}
combined_data[, -(1:9)] <- apply(combined_data[, -(1:9)], 2, genotype_to_numeric)
transposed_data <- t(combined_data)
write.csv(transposed_data, "genotype_GTExsamples.csv", row.names = TRUE)
genotype_gtex <- read.csv("genotype_GTExsamples.csv")
new_colnames <- as.character(genotype_gtex[3, ])
colnames(genotype_gtex) <- new_colnames
colnames(genotype_gtex)[1] <- "Mixture"
genotype_gtex[1:10, 1:10]
## Mixture chr5_96269410 chr5_123214952
## 1 CHROM chr5 chr5
## 2 POS 96269410 123214952
## 3 ID chr5_96269410 chr5_123214952
## 4 REF C T
## 5 ALT T C
## 6 QUAL <NA> <NA>
## 7 FILTER PASS PASS
## 8 INFO AN=1676;AF=0.320406;AC=537 AN=1676;AF=0.233294;AC=391
## 9 FORMAT GT GT
## 10 GTEX-1117F 1 0
## chr5_123425370 chr7_74016839
## 1 chr5 chr7
## 2 123425370 74016839
## 3 chr5_123425370 chr7_74016839
## 4 G A
## 5 C G
## 6 <NA> <NA>
## 7 PASS PASS
## 8 AN=1676;AF=0.198687;AC=333 AN=1676;AF=0.438544;AC=735
## 9 GT GT
## 10 0 0
## chr8_74853712 chr8_75672651
## 1 chr8 chr8
## 2 74853712 75672651
## 3 chr8_74853712 chr8_75672651
## 4 G G
## 5 A A
## 6 <NA> <NA>
## 7 PASS PASS
## 8 AN=1676;AF=0.0871122;AC=146 AN=1676;AF=0.652148;AC=1093
## 9 GT GT
## 10 1 1
## chr8_121627730 chr10_29878825
## 1 chr8 chr10
## 2 121627730 29878825
## 3 chr8_121627730 chr10_29878825
## 4 T G
## 5 C A
## 6 <NA> <NA>
## 7 PASS PASS
## 8 AN=1676;AF=0.295943;AC=496 AN=1676;AF=0.615752;AC=1032
## 9 GT GT
## 10 1 1
## chr16_83029235
## 1 chr16
## 2 83029235
## 3 chr16_83029235
## 4 G
## 5 C
## 6 <NA>
## 7 PASS
## 8 AN=1676;AF=0.130072;AC=218
## 9 GT
## 10 0
genotype_gtex <- genotype_gtex[-c(1:9), ] ##removed the header of 9 lines here
genotype_gtex[1:10, 1:10]
## Mixture chr5_96269410 chr5_123214952 chr5_123425370 chr7_74016839
## 10 GTEX-1117F 1 0 0 0
## 11 GTEX-111CU 0 2 1 1
## 12 GTEX-111FC 1 0 0 1
## 13 GTEX-111VG 0 1 1 0
## 14 GTEX-111YS 0 0 0 1
## 15 GTEX-1122O 0 1 0 1
## 16 GTEX-1128S 2 0 0 1
## 17 GTEX-113IC 0 1 1 1
## 18 GTEX-113JC 1 0 1 1
## 19 GTEX-117XS 0 0 1 2
## chr8_74853712 chr8_75672651 chr8_121627730 chr10_29878825 chr16_83029235
## 10 1 1 1 1 0
## 11 0 1 0 1 0
## 12 0 2 1 1 0
## 13 1 0 1 1 0
## 14 0 2 0 1 1
## 15 0 2 0 1 0
## 16 0 2 1 2 0
## 17 0 1 1 2 0
## 18 0 2 1 2 0
## 19 0 2 1 0 0
cibersort_results[1:5, 1:15]
## Mixture AGE Mid.age exact_age SEX DTHHRDY VSMC_II VSMC_I
## 1 GTEX-111YS 60-69 65 62 1 0 0.4174776 0.3876466
## 2 GTEX-1122O 60-69 65 64 2 0 0.5353880 0.3373091
## 3 GTEX-1128S 60-69 65 66 2 2 0.5548107 0.2345628
## 4 GTEX-117XS 60-69 65 64 1 2 0.5014955 0.3923229
## 5 GTEX-117YW 50-59 55 58 1 3 0.6688973 0.1841796
## Endothelial_I Fibroblast_I Endothelial_II Pericyte X_ Macrophage
## 1 0 0 0.1862120 0 0 0.008663851
## 2 0 0 0.1273029 0 0 0.000000000
## 3 0 0 0.1996633 0 0 0.010963268
## 4 0 0 0.1036002 0 0 0.002581379
## 5 0 0 0.1469231 0 0 0.000000000
## Lymphatic_Endothelial
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
##now add the cibersortx results in this sheet
combined_results <- merge(cibersort_results, genotype_gtex, by = "Mixture")
#making data in long format
library(tidyr)
#combined_results
#long_combined_results <- gather(combined_results, key = "Celltype", value = "composition", VSMC_I, VSMC_II, Pericyte, Neuron, Mesothelial, Macrophage, Lymphocyte, Lymphatic_Endothelial, Fibroblast_II, Fibroblast_I, Endothelial_I, Endothelial_II, X_)
#long_combined_results <- gather(combined_results, key = "leadsnps", value = "genotype", chr3_41850431, chr5_96269410, chr5_123214952, chr7_74016839, chr8_74868420, chr8_75672651, chr8_121627786, chr8_75672651, chr8_121627786, chr8_123594919, chr10_29794728, chr16_83029235 )
#long_combined_results
##for just one sample - t test ##not for me as t-test does the comparison of mean in two normally distributed groups
loci <- colnames(combined_results)[grep("chr", colnames(combined_results))]
res <- unlist(lapply(loci, function(x){
t.test(combined_results$Macrophage[combined_results[[x]] == 2], combined_results$Macrophage[combined_results[[x]] != 2])$p.value}))
#anova = aov(combined_results$Macrophage ~ factor(combined_results$chr10_29794728))
#summary(anova)
#TukeyHSD(anova) # not useful
names(res) <- loci
res[order(res)]
## chr8_75672651 chr7_74016839 chr16_83029235 chr5_123214952 chr8_121627730
## 0.0004841716 0.4028182218 0.4225738593 0.4447695042 0.4505115584
## chr5_123425370 chr5_96269410 chr10_29878825 chr8_74853712
## 0.5555902626 0.5974868835 0.9521984722 0.9575837229
head(res[order(res)])
## chr8_75672651 chr7_74016839 chr16_83029235 chr5_123214952 chr8_121627730
## 0.0004841716 0.4028182218 0.4225738593 0.4447695042 0.4505115584
## chr5_123425370
## 0.5555902626
significant_loci <- names(res[res <= 0.05])
significant_results <- res[res <= 0.05]
# Display significant loci and their corresponding p-values
data.frame(Locus = significant_loci, P_Value = significant_results)
## Locus P_Value
## chr8_75672651 chr8_75672651 0.0004841716
##for all together – t-test (cases -> 2 and not 2)
variables_of_interest <- c("VSMC_I", "VSMC_II", "Pericyte", "Neuron", "Mesothelial", "Macrophage",
"Lymphocyte", "Lymphatic_Endothelial", "Fibroblast_II", "Fibroblast_I",
"Endothelial_I", "Endothelial_II", "X_")
loci <- colnames(combined_results)[grep("chr", colnames(combined_results))]
# Create an empty list to store results for each variable
results_list <- list()
# Iterate through each variable
for (variable in variables_of_interest) {
# Perform t-test for the current variable
res <- unlist(lapply(loci, function(x) {
ttest_result <- t.test(
combined_results[[variable]][combined_results[[x]] == 2],
combined_results[[variable]][combined_results[[x]] != 2],
na.action = na.omit # Handle missing values
)
p_value <- ttest_result$p.value
return(p_value)
}))
# Store the results in the list with the variable name as the key
names(res) <- loci
results_list[[variable]] <- res
}
# Display significant results for each variable
all_results <- data.frame()
for (variable in variables_of_interest) {
significant_loci <- names(results_list[[variable]][results_list[[variable]] <= 0.05])
significant_results <- results_list[[variable]][results_list[[variable]] <= 0.05]
# Create a data frame for the current variable
variable_results <- data.frame(
Variable = rep(variable, length(significant_loci)),
Locus = significant_loci,
P_Value = significant_results
)
# Append the data frame to the overall results data frame
all_results <- rbind(all_results, variable_results)
}
# Print the final data frame with all significant results
t_results <- print(all_results[complete.cases(all_results), ])
## Variable Locus P_Value
## chr8_121627730 VSMC_II chr8_121627730 0.0495830041
## chr16_83029235 Pericyte chr16_83029235 0.0395263077
## chr8_74853712 Mesothelial chr8_74853712 0.0014798323
## chr8_75672651 Macrophage chr8_75672651 0.0004841716
## chr5_123425370 Fibroblast_I chr5_123425370 0.0015940147
## chr8_1216277301 Fibroblast_I chr8_121627730 0.0027957622
## chr5_96269410 Endothelial_I chr5_96269410 0.0303625695
## chr5_123214952 Endothelial_I chr5_123214952 0.0358451216
## chr5_1234253701 Endothelial_I chr5_123425370 0.0231940811
#write.csv(results, file = "significant_loci_cellular_level.csv")
#results
t_bonf <- p.adjust(t_results$P_Value, method = "bonferroni")
t_fdr <- p.adjust(t_results$P_Value, method = "fdr")
t_results$bonferroni <- t_bonf
t_results$fdr <- t_fdr
t_results
## Variable Locus P_Value bonferroni
## chr8_121627730 VSMC_II chr8_121627730 0.0495830041 0.446247036
## chr16_83029235 Pericyte chr16_83029235 0.0395263077 0.355736770
## chr8_74853712 Mesothelial chr8_74853712 0.0014798323 0.013318491
## chr8_75672651 Macrophage chr8_75672651 0.0004841716 0.004357545
## chr5_123425370 Fibroblast_I chr5_123425370 0.0015940147 0.014346132
## chr8_1216277301 Fibroblast_I chr8_121627730 0.0027957622 0.025161860
## chr5_96269410 Endothelial_I chr5_96269410 0.0303625695 0.273263126
## chr5_123214952 Endothelial_I chr5_123214952 0.0358451216 0.322606094
## chr5_1234253701 Endothelial_I chr5_123425370 0.0231940811 0.208746730
## fdr
## chr8_121627730 0.049583004
## chr16_83029235 0.044467096
## chr8_74853712 0.004782044
## chr8_75672651 0.004357545
## chr5_123425370 0.004782044
## chr8_1216277301 0.006290465
## chr5_96269410 0.044467096
## chr5_123214952 0.044467096
## chr5_1234253701 0.041749346
t test (o and not 0)
variables_of_interest <- c("VSMC_I", "VSMC_II", "Pericyte", "Neuron", "Mesothelial", "Macrophage",
"Lymphocyte", "Lymphatic_Endothelial", "Fibroblast_II", "Fibroblast_I",
"Endothelial_I", "Endothelial_II", "X_")
loci <- colnames(combined_results)[grep("chr", colnames(combined_results))]
# Create an empty list to store results for each variable
results_list <- list()
# Iterate through each variable
for (variable in variables_of_interest) {
# Perform t-test for the current variable
res <- unlist(lapply(loci, function(x) {
ttest_result <- t.test(
combined_results[[variable]][combined_results[[x]] == 0],
combined_results[[variable]][combined_results[[x]] != 0],
na.action = na.omit # Handle missing values
)
p_value <- ttest_result$p.value
return(p_value)
}))
# Store the results in the list with the variable name as the key
names(res) <- loci
results_list[[variable]] <- res
}
# Display significant results for each variable
all_results <- data.frame()
for (variable in variables_of_interest) {
significant_loci <- names(results_list[[variable]][results_list[[variable]] <= 0.05])
significant_results <- results_list[[variable]][results_list[[variable]] <= 0.05]
# Create a data frame for the current variable
variable_results <- data.frame(
Variable = rep(variable, length(significant_loci)),
Locus = significant_loci,
P_Value = significant_results
)
# Append the data frame to the overall results data frame
all_results <- rbind(all_results, variable_results)
}
# Print the final data frame with all significant results
t_results_0 <- print(all_results[complete.cases(all_results), ])
## Variable Locus P_Value
## chr16_83029235 VSMC_II chr16_83029235 0.034561848
## chr5_96269410 Pericyte chr5_96269410 0.046309459
## chr16_830292351 Pericyte chr16_83029235 0.015583055
## chr8_75672651 Mesothelial chr8_75672651 0.001468874
## chr10_29878825 Mesothelial chr10_29878825 0.010886859
## chr7_74016839 Fibroblast_I chr7_74016839 0.016523273
## chr8_756726511 Fibroblast_I chr8_75672651 0.005104568
## chr10_298788251 Fibroblast_I chr10_29878825 0.001585158
## chr5_962694101 Endothelial_II chr5_96269410 0.016460684
#write.csv(results, file = "significant_loci_cellular_level.csv")
#results
t_bonf <- p.adjust(t_results_0$P_Value, method = "bonferroni")
t_fdr <- p.adjust(t_results_0$P_Value, method = "fdr")
t_results_0$bonferroni <- t_bonf
t_results_0$fdr <- t_fdr
t_results_0
## Variable Locus P_Value bonferroni
## chr16_83029235 VSMC_II chr16_83029235 0.034561848 0.31105663
## chr5_96269410 Pericyte chr5_96269410 0.046309459 0.41678513
## chr16_830292351 Pericyte chr16_83029235 0.015583055 0.14024749
## chr8_75672651 Mesothelial chr8_75672651 0.001468874 0.01321986
## chr10_29878825 Mesothelial chr10_29878825 0.010886859 0.09798173
## chr7_74016839 Fibroblast_I chr7_74016839 0.016523273 0.14870946
## chr8_756726511 Fibroblast_I chr8_75672651 0.005104568 0.04594112
## chr10_298788251 Fibroblast_I chr10_29878825 0.001585158 0.01426642
## chr5_962694101 Endothelial_II chr5_96269410 0.016460684 0.14814616
## fdr
## chr16_83029235 0.038882079
## chr5_96269410 0.046309459
## chr16_830292351 0.021244208
## chr8_75672651 0.007133211
## chr10_29878825 0.021244208
## chr7_74016839 0.021244208
## chr8_756726511 0.015313705
## chr10_298788251 0.007133211
## chr5_962694101 0.021244208
##LOOKS SUSPICIOUS!!! ##TRYING LINEAR MODELLING NOW
library(RegParallel)
## Loading required package: doParallel
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
## Loading required package: data.table
## Loading required package: stringr
## Loading required package: survival
## Loading required package: arm
## Loading required package: MASS
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: lme4
##
## arm (Version 1.13-1, built: 2022-8-25)
## Working directory is /home/mchopra/Documents/PhD-Year1/GTEx
#celltypes of interest - VSMC_I, VSMC_II, Endothelial_I, Endothelial_II, Fibroblast_I, Fibroblast_II, Pericyte, Macrophage, Mesothelial only. Not including Lymphatic_Endothelial, Lymphocyte and Neuron as i got no cellular proportion of them in the GTEx samples.
celltypes <- c("VSMC_I", "VSMC_II", "Pericyte", "Mesothelial", "Macrophage", "Fibroblast_II", "Fibroblast_I", "Endothelial_I", "Endothelial_II")
#fit <- lm(VSMC_II ~ as.numeric(chr3_41850431), data=combined_results)
#summary(fit)
#as.numeric because of additive genetic model
#fc = as.factor(combined_results$chr3_41850431)
#ggpubr::compare_means(VSMC_II ~ chr3_41850431, data=combined_results, ref.group = "0", method = "t.test")
#anova = aov(combined_results$VSMC_II ~ as.factor(combined_results$chr3_41850431))
#TukeyHSD(anova)
macro_res1 <- RegParallel(
data = combined_results,
formula = 'VSMC_II ~ as.numeric([*])',
FUN = function(formula, data)
lm(formula = formula,
data = data),
FUNtype = 'lm',
variables = colnames(combined_results)[23:31],
blocksize = 9,
p.adjust = "BH",
cores = 2,
nestedParallel = FALSE,
conflevel = 95)
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- VSMC_II
## First 5 formulae:
## -- VSMC_II ~ as.numeric(chr5_96269410)
## -- VSMC_II ~ as.numeric(chr5_123214952)
## -- VSMC_II ~ as.numeric(chr5_123425370)
## -- VSMC_II ~ as.numeric(chr7_74016839)
## -- VSMC_II ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
# Define the response variables
celltypes <- c("VSMC_I", "VSMC_II", "Pericyte", "Mesothelial", "Macrophage", "Fibroblast_II", "Fibroblast_I", "Endothelial_I", "Endothelial_II")
# Initialize a list to store the model results
macro_res_list <- list()
# Use a for loop to fit linear regression models for each response variable
for (ctypes in celltypes) {
formula <- paste(ctypes, '~ as.numeric([*])')
# Note: Replace 'as.numeric([*])' with the actual predictor variable
# you want to use. I've used a placeholder here.
macro_res <- RegParallel(
data = combined_results,
formula = formula,
FUN = function(formula, data)
lm(formula = formula, data = data),
FUNtype = 'lm',
variables = colnames(combined_results)[23:31],
blocksize = 9,
p.adjust = "BH",
cores = 2,
nestedParallel = FALSE,
conflevel = 95
)
# Store the results in the list
macro_res_list[[ctypes]] <- macro_res
}
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- VSMC_I
## First 5 formulae:
## -- VSMC_I ~ as.numeric(chr5_96269410)
## -- VSMC_I ~ as.numeric(chr5_123214952)
## -- VSMC_I ~ as.numeric(chr5_123425370)
## -- VSMC_I ~ as.numeric(chr7_74016839)
## -- VSMC_I ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- VSMC_II
## First 5 formulae:
## -- VSMC_II ~ as.numeric(chr5_96269410)
## -- VSMC_II ~ as.numeric(chr5_123214952)
## -- VSMC_II ~ as.numeric(chr5_123425370)
## -- VSMC_II ~ as.numeric(chr7_74016839)
## -- VSMC_II ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Pericyte
## First 5 formulae:
## -- Pericyte ~ as.numeric(chr5_96269410)
## -- Pericyte ~ as.numeric(chr5_123214952)
## -- Pericyte ~ as.numeric(chr5_123425370)
## -- Pericyte ~ as.numeric(chr7_74016839)
## -- Pericyte ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Mesothelial
## First 5 formulae:
## -- Mesothelial ~ as.numeric(chr5_96269410)
## -- Mesothelial ~ as.numeric(chr5_123214952)
## -- Mesothelial ~ as.numeric(chr5_123425370)
## -- Mesothelial ~ as.numeric(chr7_74016839)
## -- Mesothelial ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Macrophage
## First 5 formulae:
## -- Macrophage ~ as.numeric(chr5_96269410)
## -- Macrophage ~ as.numeric(chr5_123214952)
## -- Macrophage ~ as.numeric(chr5_123425370)
## -- Macrophage ~ as.numeric(chr7_74016839)
## -- Macrophage ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Fibroblast_II
## First 5 formulae:
## -- Fibroblast_II ~ as.numeric(chr5_96269410)
## -- Fibroblast_II ~ as.numeric(chr5_123214952)
## -- Fibroblast_II ~ as.numeric(chr5_123425370)
## -- Fibroblast_II ~ as.numeric(chr7_74016839)
## -- Fibroblast_II ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Fibroblast_I
## First 5 formulae:
## -- Fibroblast_I ~ as.numeric(chr5_96269410)
## -- Fibroblast_I ~ as.numeric(chr5_123214952)
## -- Fibroblast_I ~ as.numeric(chr5_123425370)
## -- Fibroblast_I ~ as.numeric(chr7_74016839)
## -- Fibroblast_I ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Endothelial_I
## First 5 formulae:
## -- Endothelial_I ~ as.numeric(chr5_96269410)
## -- Endothelial_I ~ as.numeric(chr5_123214952)
## -- Endothelial_I ~ as.numeric(chr5_123425370)
## -- Endothelial_I ~ as.numeric(chr7_74016839)
## -- Endothelial_I ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
##
## ##############################
## #RegParallel
## ##############################
## System is:
## -- Linux
## Blocksize:
## -- 9
## Cores / Threads:
## -- 2
## Terms included in model:
## -- Endothelial_II
## First 5 formulae:
## -- Endothelial_II ~ as.numeric(chr5_96269410)
## -- Endothelial_II ~ as.numeric(chr5_123214952)
## -- Endothelial_II ~ as.numeric(chr5_123425370)
## -- Endothelial_II ~ as.numeric(chr7_74016839)
## -- Endothelial_II ~ as.numeric(chr8_74853712)
## Processing 9 formulae, batch 1 of 1
## -- index1: 1; index2: 9
## Done!
lm_results <- dplyr::bind_rows(macro_res_list, .id ="celltypes")
##checking using plots!!
lm and glm(for non-normal distribution data) does give the same results ##not anything significant
library(NatParksPalettes)
library(ggplot2)
fit <- lm((Endothelial_I) ~ as.numeric(chr8_74853712), data = combined_results)
summary(fit)
##
## Call:
## lm(formula = (Endothelial_I) ~ as.numeric(chr8_74853712), data = combined_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.007419 -0.000434 -0.000434 -0.000434 0.119312
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0004344 0.0004504 0.964 0.33550
## as.numeric(chr8_74853712) 0.0034924 0.0010826 3.226 0.00136 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.008245 on 385 degrees of freedom
## Multiple R-squared: 0.02632, Adjusted R-squared: 0.02379
## F-statistic: 10.41 on 1 and 385 DF, p-value: 0.001362
ggplot(data = combined_results, aes(x = chr8_74853712, y = Endothelial_I, group = chr8_74853712, color = chr8_74853712)) +
geom_jitter() +
geom_boxplot() +
geom_abline(slope = coef(fit)[2], intercept = coef(fit)[1], color = "purple") +
theme_classic()
fit2 = lm(Mesothelial ~ as.numeric(chr8_121627730), data=combined_results)
summary(fit2)
##
## Call:
## lm(formula = Mesothelial ~ as.numeric(chr8_121627730), data = combined_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.002222 -0.001199 -0.000176 -0.000176 0.062672
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0001756 0.0003206 0.548 0.5842
## as.numeric(chr8_121627730) 0.0010233 0.0003875 2.641 0.0086 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.004602 on 385 degrees of freedom
## Multiple R-squared: 0.01779, Adjusted R-squared: 0.01524
## F-statistic: 6.975 on 1 and 385 DF, p-value: 0.008603
ggplot(data = combined_results, aes(x = chr8_121627730, y = Mesothelial, group = chr8_121627730)) +
geom_jitter() +
geom_boxplot() +
geom_abline(slope = coef(fit2)[2], intercept = coef(fit2)[1], color = "purple") +
theme_classic()
fit3 = lm(Fibroblast_I ~ as.numeric(chr7_74016839), data=combined_results)
summary(fit3)
##
## Call:
## lm(formula = Fibroblast_I ~ as.numeric(chr7_74016839), data = combined_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.003912 -0.001940 -0.001940 0.000032 0.130288
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.201e-05 9.214e-04 -0.035 0.9723
## as.numeric(chr7_74016839) 1.972e-03 7.980e-04 2.471 0.0139 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01096 on 385 degrees of freedom
## Multiple R-squared: 0.01562, Adjusted R-squared: 0.01306
## F-statistic: 6.108 on 1 and 385 DF, p-value: 0.01389
ggplot(data = combined_results, aes(x = chr7_74016839, y = Fibroblast_I, group = chr7_74016839)) +
geom_jitter() +
geom_boxplot() +
geom_abline(slope = coef(fit3)[2], intercept = coef(fit3)[1], color = "purple") +
theme_classic()
fit4 = lm(Endothelial_II ~ as.numeric(chr8_74853712) + exact_age + SEX, data=combined_results)
summary(fit4)
##
## Call:
## lm(formula = Endothelial_II ~ as.numeric(chr8_74853712) + exact_age +
## SEX, data = combined_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.16743 -0.05450 -0.01126 0.04884 0.27393
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2370118 0.0200085 11.846 <2e-16 ***
## as.numeric(chr8_74853712) 0.0246095 0.0100597 2.446 0.0149 *
## exact_age -0.0001514 0.0003037 -0.498 0.6184
## SEX -0.0036256 0.0081351 -0.446 0.6561
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07655 on 383 degrees of freedom
## Multiple R-squared: 0.01617, Adjusted R-squared: 0.008463
## F-statistic: 2.098 on 3 and 383 DF, p-value: 0.09999
ggplot(data = combined_results, aes(x = chr8_74853712, y = Endothelial_II, group = chr8_74853712, color = chr8_74853712, fill = chr8_74853712)) +
geom_jitter() +
geom_boxplot() +
geom_abline(slope = coef(fit4)[2], intercept = coef(fit4)[1], color = "purple") +
theme_classic() +
scale_fill_manual(values = natparks.pals("Charmonix", 3)) +
labs(y= "Endothelial_II", x = "PI15/RP11-6I2.3") + guides(color=FALSE)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
fit5 = lm(VSMC_I ~ as.numeric(chr10_29878825) + exact_age + SEX , data=combined_results)
summary(fit5)
##
## Call:
## lm(formula = VSMC_I ~ as.numeric(chr10_29878825) + exact_age +
## SEX, data = combined_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.29131 -0.06599 0.00219 0.07719 0.35332
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3568465 0.0317453 11.241 <2e-16 ***
## as.numeric(chr10_29878825) -0.0193452 0.0084722 -2.283 0.0230 *
## exact_age -0.0008967 0.0004441 -2.019 0.0441 *
## SEX -0.0165990 0.0119005 -1.395 0.1639
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1119 on 383 degrees of freedom
## Multiple R-squared: 0.02625, Adjusted R-squared: 0.01862
## F-statistic: 3.442 on 3 and 383 DF, p-value: 0.01692
ggplot(data = combined_results, aes(x = chr10_29878825, y = VSMC_I, group = chr10_29878825, color = chr10_29878825, fill = chr10_29878825)) +
geom_jitter() +
geom_boxplot() +
geom_abline(slope = coef(fit5)[2], intercept = coef(fit5)[1], color = "purple") +
theme_classic() +
scale_fill_manual(values = natparks.pals("Charmonix", 3)) +
labs(y= "Vascular Smooth Muscle Cells I", x = "SVIL genotype") + guides(color=FALSE)
fit6 = lm(Pericyte ~ as.numeric(chr5_96269410), data=combined_results)
summary(fit6)
##
## Call:
## lm(formula = Pericyte ~ as.numeric(chr5_96269410), data = combined_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.002828 -0.002828 -0.001639 -0.001639 0.085631
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0028283 0.0005960 4.746 2.94e-06 ***
## as.numeric(chr5_96269410) -0.0011897 0.0006087 -1.954 0.0514 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.00844 on 385 degrees of freedom
## Multiple R-squared: 0.009824, Adjusted R-squared: 0.007252
## F-statistic: 3.82 on 1 and 385 DF, p-value: 0.05138
ggplot(data = combined_results, aes(x = chr5_96269410, y = Pericyte, group = chr5_96269410)) +
geom_jitter() +
geom_boxplot() +
geom_abline(slope = coef(fit6)[2], intercept = coef(fit6)[1], color = "purple") +
theme_classic()
fit7 = lm(VSMC_II ~ as.numeric(chr10_29878825) + exact_age + SEX , data=combined_results)
summary(fit7)
##
## Call:
## lm(formula = VSMC_II ~ as.numeric(chr10_29878825) + exact_age +
## SEX, data = combined_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.239136 -0.052667 0.002132 0.054667 0.306668
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3716536 0.0229092 16.223 < 2e-16 ***
## as.numeric(chr10_29878825) 0.0134282 0.0061140 2.196 0.028670 *
## exact_age 0.0012160 0.0003205 3.794 0.000172 ***
## SEX 0.0124025 0.0085881 1.444 0.149515
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08073 on 383 degrees of freedom
## Multiple R-squared: 0.04938, Adjusted R-squared: 0.04193
## F-statistic: 6.631 on 3 and 383 DF, p-value: 0.0002238
ggplot(data = combined_results, aes(x = chr10_29878825, y = VSMC_II, group = chr10_29878825, color = chr10_29878825, fill = chr10_29878825)) +
geom_jitter() +
geom_boxplot() +
geom_abline(slope = coef(fit7)[2], intercept = coef(fit7)[1], color = "purple", size = 1) +
theme_classic() +
scale_fill_manual(values = natparks.pals("Charmonix", 3)) +
labs(y= "Vascular Smooth Muscle Cells II", x = "SVIL genotype") +
guides(fill = FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#p <- grid.arrange(b, c, a, ncol = 3, widths = c(6,6,6))
#p
#ggsave("/home/mchopra/Documents/PhD-Year1/GWAS/genotype_cellularcomposition.png", p, width = 20, height = 6, dpi = 800)
fit4 = lm(Endothelial_II ~ as.numeric(chr8_74853712), data=combined_results)
summary(fit4)
##
## Call:
## lm(formula = Endothelial_II ~ as.numeric(chr8_74853712), data = combined_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.16801 -0.05718 -0.01184 0.04786 0.26897
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.224279 0.004174 53.735 <2e-16 ***
## as.numeric(chr8_74853712) 0.024345 0.010031 2.427 0.0157 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0764 on 385 degrees of freedom
## Multiple R-squared: 0.01507, Adjusted R-squared: 0.01251
## F-statistic: 5.89 on 1 and 385 DF, p-value: 0.01568
ggplot(data = combined_results, aes(x = chr8_74853712, y = Endothelial_II, group = chr8_74853712, fill = chr8_74853712)) +
geom_boxplot(outlier.shape = NA) +
geom_point(position = position_jitterdodge(), color = "chocolate1", alpha = 0.5) +
geom_abline(slope = coef(fit4)[2], intercept = coef(fit4)[1], color = "purple") +
theme_minimal() +
labs(y= "Endothelial_II", x = "PI15/RP11-6I2.3", ylim(0,0.8)) +
guides(fill=FALSE) +
coord_cartesian(ylim = c(0,0.8))
######
fit5 = lm(VSMC_I ~ as.numeric(chr10_29878825) + exact_age, data=combined_results)
summary(fit5)
##
## Call:
## lm(formula = VSMC_I ~ as.numeric(chr10_29878825) + exact_age,
## data = combined_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28471 -0.06292 0.00091 0.07966 0.35906
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3320838 0.0263492 12.603 <2e-16 ***
## as.numeric(chr10_29878825) -0.0187054 0.0084702 -2.208 0.0278 *
## exact_age -0.0008688 0.0004442 -1.956 0.0512 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.112 on 384 degrees of freedom
## Multiple R-squared: 0.02131, Adjusted R-squared: 0.01621
## F-statistic: 4.18 on 2 and 384 DF, p-value: 0.01601
ggplot(data = combined_results, aes(x = chr10_29878825, y = VSMC_I, group = chr10_29878825, fill = chr10_29878825)) +
geom_boxplot(outlier.shape = NA) +
geom_point(position = position_jitterdodge(), color = "chocolate1", alpha = 0.5) +
geom_abline(slope = coef(fit5)[2], intercept = coef(fit5)[1], color = "purple") +
theme_minimal() +
labs(y= "Vascular Smooth Muscle Cells I", x = "SVIL") +
guides(fill=FALSE) +
coord_cartesian(ylim = c(0,0.8)) +
theme(legend.text = element_text(size = 20))
#ggsave("/home/mchopra/Documents/PhD-Year1/GTEx/vsmc1VSgenotype.png", y)
######
fit7 = lm(VSMC_II ~ as.numeric(chr10_29878825) + exact_age, data=combined_results)
summary(fit7)
##
## Call:
## lm(formula = VSMC_II ~ as.numeric(chr10_29878825) + exact_age,
## data = combined_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.240488 -0.051188 0.000293 0.054271 0.314618
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3901558 0.0190186 20.514 < 2e-16 ***
## as.numeric(chr10_29878825) 0.0129501 0.0061137 2.118 0.034798 *
## exact_age 0.0011951 0.0003206 3.728 0.000222 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08085 on 384 degrees of freedom
## Multiple R-squared: 0.0442, Adjusted R-squared: 0.03922
## F-statistic: 8.879 on 2 and 384 DF, p-value: 0.0001699
ggplot(data = combined_results, aes(x = chr10_29878825, y = VSMC_II, group = chr10_29878825, fill = chr10_29878825)) +
geom_boxplot(outlier.shape = NA) +
geom_point(position = position_jitterdodge(), color = "chocolate1", alpha = 0.5) +
# geom_abline(slope = coef(fit7)[2], intercept = coef(fit7)[1], color = "purple", size = 1) +
theme_minimal() +
labs(y= "Vascular Smooth Muscle Cells II", x = "SVIL") +
guides(fill = FALSE) +
coord_cartesian(ylim = c(0,0.8)) +
theme(legend.text = element_text(size = 20))
####
library(gridExtra)
#ggsave("/home/mchopra/Documents/PhD-Year1/GTEx/vsmc2VSgenotype.png", z)
#plot <- grid.arrange(y, z, ncol = 2, widths = c(8,8))
#ggsave("/home/mchopra/Documents/PhD-Year1/GTEx/genotype_cellularcomposition_final.png", plot, width = 14, height = 8, dpi = 800)
fit5 <- lm(VSMC_I ~ as.numeric(chr10_29878825) + exact_age, data = combined_results)
summary(fit5)
##
## Call:
## lm(formula = VSMC_I ~ as.numeric(chr10_29878825) + exact_age,
## data = combined_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28471 -0.06292 0.00091 0.07966 0.35906
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3320838 0.0263492 12.603 <2e-16 ***
## as.numeric(chr10_29878825) -0.0187054 0.0084702 -2.208 0.0278 *
## exact_age -0.0008688 0.0004442 -1.956 0.0512 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.112 on 384 degrees of freedom
## Multiple R-squared: 0.02131, Adjusted R-squared: 0.01621
## F-statistic: 4.18 on 2 and 384 DF, p-value: 0.01601
y <- ggplot(data = combined_results, aes(x = chr10_29878825, y = VSMC_I, group = chr10_29878825, fill = chr10_29878825)) +
geom_boxplot(outlier.shape = NA) +
geom_point(position = position_jitterdodge(), color = "chocolate1", alpha = 0.5) +
# geom_abline(slope = coef(fit5)[2], intercept = coef(fit5)[1], color = "purple") +
geom_hline(yintercept = median(combined_results$VSMC_I), linetype = "dashed", color = "blue") +
theme_minimal() +
labs(y = "Vascular Smooth Muscle Cells I", x = "SVIL", size = 20) +
guides(fill = FALSE) +
coord_cartesian(ylim = c(0, 0.8)) +
theme(legend.text = element_text(size = 20))
y
fit7 = lm(VSMC_II ~ as.numeric(chr10_29878825) + exact_age, data=combined_results)
summary(fit7)
##
## Call:
## lm(formula = VSMC_II ~ as.numeric(chr10_29878825) + exact_age,
## data = combined_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.240488 -0.051188 0.000293 0.054271 0.314618
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3901558 0.0190186 20.514 < 2e-16 ***
## as.numeric(chr10_29878825) 0.0129501 0.0061137 2.118 0.034798 *
## exact_age 0.0011951 0.0003206 3.728 0.000222 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08085 on 384 degrees of freedom
## Multiple R-squared: 0.0442, Adjusted R-squared: 0.03922
## F-statistic: 8.879 on 2 and 384 DF, p-value: 0.0001699
Z <- ggplot(data = combined_results, aes(x = chr10_29878825, y = VSMC_II, group = chr10_29878825, fill = chr10_29878825)) +
geom_boxplot(outlier.shape = NA) +
geom_point(position = position_jitterdodge(), color = "chocolate1", alpha = 0.5) +
# geom_abline(slope = coef(fit7)[2], intercept = coef(fit7)[1], color = "purple") +
geom_hline(yintercept = median(combined_results$VSMC_II), linetype = "dashed", color = "blue") +
theme_minimal() +
labs(y = "Vascular Smooth Muscle Cells II", x = "SVIL", size = 20) +
guides(fill = FALSE) +
coord_cartesian(ylim = c(0, 0.8)) +
theme(legend.text = element_text(size = 20))
Z
#plot <- grid.arrange(y, Z, ncol = 2, widths = c(8,8))
ggsave("/home/mchopra/Documents/PhD-Year1/GTEx/vsmc1vsSVIL.png", y , width = 5, height = 4, dpi = 800)
ggsave("/home/mchopra/Documents/PhD-Year1/GTEx/vsmc2vsSVIL.png", Z , width = 5, height = 4, dpi = 800)
##WORTH NOTICING Endothelial_II, VSMC_II
##maybe and maybe not useful!!
# Load the necessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(broom)
# Your chromosome data
chromosomes <- c("chr3_41850431", "chr5_96269410", "chr5_123214952", "chr7_74016839", "chr8_74868420", "chr8_75672651", "chr8_121627786", "chr8_123594919", "chr10_29794728", "chr16_83029235")
#colnames(results_table)[1] <- "leadsnps"
#a <- merge(long_combined_results, results_table, all.y = TRUE, by = "leadsnps")
#rename = rep(c("Intercept", "Slope"), c(2), length.out=7740)
#a$term = rename
#library(lme4)
#fit = lm(Macrophage ~ as.numeric(chr3_41850431) + as.numeric(chr5_96269410) + as.numeric(chr5_123214952) + as.numeric(chr7_74016839) + as.numeric(chr8_74868420) + as.numeric(chr8_75672651) + as.numeric(chr8_121627786) + as.numeric(chr8_123594919) + as.numeric(chr10_29794728) + as.numeric(chr16_83029235), data=combined_results)
summary(fit)
##
## Call:
## lm(formula = (Endothelial_I) ~ as.numeric(chr8_74853712), data = combined_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.007419 -0.000434 -0.000434 -0.000434 0.119312
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0004344 0.0004504 0.964 0.33550
## as.numeric(chr8_74853712) 0.0034924 0.0010826 3.226 0.00136 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.008245 on 385 degrees of freedom
## Multiple R-squared: 0.02632, Adjusted R-squared: 0.02379
## F-statistic: 10.41 on 1 and 385 DF, p-value: 0.001362
fit$coefficients
## (Intercept) as.numeric(chr8_74853712)
## 0.0004343582 0.0034923832
# this line returns predictions by the model (on the same data) - controls for the effect of Sex
#combined_results$predict_Macrophage = predict(fit, newdata = combined_results)
library(ggpubr)
# this is a simpler plot, it plots AGE and VSMC_I and comutes the correlation R + pvalue
# but it does not control for sex - we are plotting raw data
#ggplot(data = a, aes(x = genotype, y = Macrophage, group_by(leadsnps), color = genotype)) +
#facet_wrap(~leadsnps, nrow=2) +
# geom_jitter() +
#geom_boxplot() +
#geom_smooth(method = "lm") +
# geom_abline(slope = coef(fit)[2], intercept = coef(fit)[1], color = "red") +
#theme_classic()