We’ll be dealing with following Hypothesises
Hypothesis Q1: Does expression of Epigenetics regulators change with age?
Hypothesis Q2: Are House keeping genes and Epigenetics regulators co-related?
library(ggplot2)
library(tidyverse)
library(PCAtools)
library(dplyr)
library(biomaRt)
setwd("X:/AGING/J Pedro Project/Project Files/age_brain_tsv_files")
age20_29 <- read.csv("brain_tg_20-29.csv.tsv", header = T, sep = "\t")
age30_39 <- read.csv("brain_tg_30-39.csv.tsv", header = T, sep = "\t")
age40_49 <- read.csv("brain_tg_40-49.csv.tsv", header = T, sep = "\t")
age50_59 <- read.csv("brain_tg_50-59.csv.tsv", header = T, sep = "\t")
age60_69 <- read.csv("brain_tg_60-69.csv.tsv", header = T, sep = "\t")
age70_79 <- read.csv("brain_tg_70-79.csv.tsv", header = T, sep = "\t")
Housekeeping <- read.delim2("X:/AGING/J Pedro Project/Project Files/Housekeeping genes/degannotation-e.dat", header=F)
names(Housekeeping) <- Housekeeping[1,]
Housekeeping <- Housekeeping[-1,]
# Filtering only human genes
homohouse <- Housekeeping[Housekeeping$`#Refseq`=="Homo sapiens",]
homogenes <- as.data.frame(homohouse$`#Gene_Ref`)
homogenes <- unique(homogenes)
colnames(homogenes) <- c("HGNC_Symbol")
# Converting ensemble ids to hgnc_symbol ids
mart <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")
braingeneid <- getBM(attributes = c("ensembl_gene_id","hgnc_symbol"),
filters = "ensembl_gene_id", values = age20_29[,1],
mart = mart)
common <- merge(braingeneid, homogenes, by.x = "hgnc_symbol", by.y = "HGNC_Symbol", all.x = F, all.y = T)
common <- na.omit(common)
age20_29 <- merge(common, age20_29, by.x = "ensembl_gene_id", by.y = "gene_id", all.x = T, all.y = F)
age30_39 <- merge(common, age30_39, by.x = "ensembl_gene_id", by.y = "gene_id", all.x = T, all.y = F)
age40_49 <- merge(common, age40_49, by.x = "ensembl_gene_id", by.y = "gene_id", all.x = T, all.y = F)
age50_59 <- merge(common, age50_59, by.x = "ensembl_gene_id", by.y = "gene_id", all.x = T, all.y = F)
age60_69 <- merge(common, age60_69, by.x = "ensembl_gene_id", by.y = "gene_id", all.x = T, all.y = F)
age70_79 <- merge(common, age70_79, by.x = "ensembl_gene_id", by.y = "gene_id", all.x = T, all.y = F)
Dnmt1a<- age20_29[age20_29$gene_id=="ENSG00000130816",]
Dnmt1b<- age30_39[age30_39$gene_id=="ENSG00000130816",]
Dnmt1c<- age40_49[age40_49$gene_id=="ENSG00000130816",]
Dnmt1d<- age50_59[age50_59$gene_id=="ENSG00000130816",]
Dnmt1e<- age60_69[age60_69$gene_id=="ENSG00000130816",]
Dnmt1f<- age70_79[age70_79$gene_id=="ENSG00000130816",]
Transposing datasets
# Making copies of original dataset so we dont mess up the original one
tage20_29 = age20_29
tage30_39 = age30_39
tage40_49 = age40_49
tage50_59 = age50_59
tage60_69 = age60_69
tage70_79 = age70_79
# Transposing the datasets
rownames(tage20_29) <- tage20_29[,1]
tage20_29 <- tage20_29 [, -(1:2)]
tage20_29 <- as.data.frame(t(tage20_29))
rownames(tage30_39) <- tage30_39[,1]
tage30_39 <- tage30_39 [, -(1:2)]
tage30_39 <- as.data.frame(t(tage30_39))
rownames(tage40_49) <- tage40_49[,1]
tage40_49 <- tage40_49 [, -(1:2)]
tage40_49 <- as.data.frame(t(tage40_49))
rownames(tage50_59) <- tage50_59[,1]
tage50_59 <- tage50_59 [, -(1:2)]
tage50_59 <- as.data.frame(t(tage50_59))
rownames(tage60_69) <- tage60_69[,1]
tage60_69 <- tage60_69 [, -(1:2)]
tage60_69 <- as.data.frame(t(tage60_69))
rownames(tage70_79) <- tage70_79[,1]
tage70_79 <- tage70_79 [, -(1:2)]
tage70_79 <- as.data.frame(t(tage70_79))
i<-c()
cor_e <- c()
cor_p <- c()
for(i in 1:7627)
{
u = cor.test(tage20_29[,colnames(tage20_29)=="ENSG00000130816"], tage20_29[,i],
method = "pearson", p.adjust.methods = "fdr", use = 'pairwise.complete.obs', exact = F)
cor_e[i] = u$estimate
cor_p[i] = u$p.value
}
cor_test_20_29 <- data.frame(Gene_ID = colnames(tage20_29),
Gene_Name= age20_29[,2],
cor_value_20_29 = cor_e,
p_value_20_29 = cor_p)
i<-c()
cor_e <- c()
cor_p <- c()
for(i in 1:7627)
{
u = cor.test(tage30_39[,colnames(tage30_39)=="ENSG00000130816"], tage30_39[,i],
method = "pearson", p.adjust.methods = "fdr", use = 'pairwise.complete.obs', exact = F)
cor_e[i] = u$estimate
cor_p[i] = u$p.value
}
cor_test_30_39 <- data.frame(Gene_ID = colnames(tage30_39),
Gene_Name= age30_39[,2],
cor_value_30_39 = cor_e,
p_value_30_39 = cor_p)
i<-c()
cor_e <- c()
cor_p <- c()
for(i in 1:7627)
{
u = cor.test(tage40_49[,colnames(tage40_49)=="ENSG00000130816"], tage40_49[,i],
method = "pearson", p.adjust.methods = "fdr", use = 'pairwise.complete.obs', exact = F)
cor_e[i] = u$estimate
cor_p[i] = u$p.value
}
cor_test_40_49 <- data.frame(Gene_ID = colnames(tage40_49),
Gene_Name= age40_49[,2],
cor_value_40_49 = cor_e,
p_value_40_49 = cor_p)
i<-c()
cor_e <- c()
cor_p <- c()
for(i in 1:7627)
{
u = cor.test(tage50_59[,colnames(tage50_59)=="ENSG00000130816"], tage50_59[,i],
method = "pearson", p.adjust.methods = "fdr", use = 'pairwise.complete.obs', exact = F)
cor_e[i] = u$estimate
cor_p[i] = u$p.value
}
cor_test_50_59 <- data.frame(Gene_ID = colnames(tage50_59),
Gene_Name= age50_59[,2],
cor_value_50_59 = cor_e,
p_value_50_59 = cor_p)
i<-c()
cor_e <- c()
cor_p <- c()
for(i in 1:7627)
{
u = cor.test(tage60_69[,colnames(tage60_69)=="ENSG00000130816"], tage60_69[,i],
method = "pearson", p.adjust.methods = "fdr", use = 'pairwise.complete.obs', exact = F)
cor_e[i] = u$estimate
cor_p[i] = u$p.value
}
cor_test_60_69 <- data.frame(Gene_ID = colnames(tage60_69),
Gene_Name= age60_69[,2],
cor_value_60_69 = cor_e,
p_value_60_69 = cor_p)
i<-c()
cor_e <- c()
cor_p <- c()
for(i in 1:7627)
{
u = cor.test(tage70_79[,colnames(tage70_79)=="ENSG00000130816"], tage70_79[,i],
method = "pearson", p.adjust.methods = "fdr", use = 'pairwise.complete.obs', exact = F)
cor_e[i] = u$estimate
cor_p[i] = u$p.value
}
cor_test_70_79 <- data.frame(Gene_ID = colnames(tage70_79),
Gene_Name= age70_79[,2],
cor_value_70_79 = cor_e,
p_value_70_79 = cor_p)
i<-c()
cor_e <- c()
cor_p <- c()
for(i in 1:7627)
{
u = cor.test(tage30_39[,colnames(tage30_39)=="ENSG00000130816"], tage30_39[,i],
method = "pearson", p.adjust.methods = "fdr", use = 'pairwise.complete.obs', exact = F)
cor_e[i] = u$estimate
cor_p[i] = u$p.value
}
cor_test_30_39 <- data.frame(Gene_ID = colnames(tage30_39),
Gene_Name= age30_39[,2],
cor_value_30_39 = cor_e,
p_value_30_39 = cor_p)
Now we will filter out data for which p value is less then 0.01 and cor value is greater than 0.9
commongenes <- merge(poscor20_29, poscor30_39, by.x = "Gene_ID", by.y = "Gene_ID")