Co-expression analysis in Aging Brain

Hypothesis:

We’ll be dealing with following Hypothesises

  • Hypothesis Q1: Does expression of Epigenetics regulators change with age?

  • Hypothesis Q2: Are House keeping genes and Epigenetics regulators co-related?

Loading Libraries

library(ggplot2)
library(tidyverse)
library(PCAtools)
library(dplyr)
library(biomaRt)

Setting working directory

setwd("X:/AGING/J Pedro Project/Project Files/age_brain_tsv_files")

Loading Brain tissue read count Files

age20_29 <- read.csv("brain_tg_20-29.csv.tsv", header = T, sep = "\t")
age30_39 <- read.csv("brain_tg_30-39.csv.tsv", header = T, sep = "\t")
age40_49 <- read.csv("brain_tg_40-49.csv.tsv", header = T, sep = "\t")
age50_59 <- read.csv("brain_tg_50-59.csv.tsv", header = T, sep = "\t")
age60_69 <- read.csv("brain_tg_60-69.csv.tsv", header = T, sep = "\t")
age70_79 <- read.csv("brain_tg_70-79.csv.tsv", header = T, sep = "\t")

Loading housekeeping genes

Housekeeping <- read.delim2("X:/AGING/J Pedro Project/Project Files/Housekeeping genes/degannotation-e.dat", header=F)
names(Housekeeping) <- Housekeeping[1,]
Housekeeping <- Housekeeping[-1,]

# Filtering only human genes 
homohouse <- Housekeeping[Housekeeping$`#Refseq`=="Homo sapiens",]
homogenes <- as.data.frame(homohouse$`#Gene_Ref`)
homogenes <- unique(homogenes)
colnames(homogenes) <- c("HGNC_Symbol")

# Converting ensemble ids to hgnc_symbol ids 

mart <- useMart(biomart = "ensembl", dataset = "hsapiens_gene_ensembl")

braingeneid <- getBM(attributes = c("ensembl_gene_id","hgnc_symbol"),
                 filters = "ensembl_gene_id", values = age20_29[,1],
                 mart = mart)

Now taking common genes in Brain and Housekeeping gene set (braingeneid & homohouse)

common <- merge(braingeneid, homogenes, by.x = "hgnc_symbol", by.y = "HGNC_Symbol", all.x = F, all.y = T)
common <- na.omit(common)

Filtering out Housekeeping genes in all agegroup brain dataset

age20_29 <- merge(common, age20_29, by.x = "ensembl_gene_id", by.y = "gene_id", all.x = T, all.y = F)
age30_39 <- merge(common, age30_39, by.x = "ensembl_gene_id", by.y = "gene_id", all.x = T, all.y = F)
age40_49 <- merge(common, age40_49, by.x = "ensembl_gene_id", by.y = "gene_id", all.x = T, all.y = F)
age50_59 <- merge(common, age50_59, by.x = "ensembl_gene_id", by.y = "gene_id", all.x = T, all.y = F)
age60_69 <- merge(common, age60_69, by.x = "ensembl_gene_id", by.y = "gene_id", all.x = T, all.y = F)
age70_79 <- merge(common, age70_79, by.x = "ensembl_gene_id", by.y = "gene_id", all.x = T, all.y = F)

Filtering out DNMT1 ENSG00000130816

Dnmt1a<- age20_29[age20_29$gene_id=="ENSG00000130816",]
Dnmt1b<- age30_39[age30_39$gene_id=="ENSG00000130816",]
Dnmt1c<- age40_49[age40_49$gene_id=="ENSG00000130816",]
Dnmt1d<- age50_59[age50_59$gene_id=="ENSG00000130816",]
Dnmt1e<- age60_69[age60_69$gene_id=="ENSG00000130816",]
Dnmt1f<- age70_79[age70_79$gene_id=="ENSG00000130816",]

Refining data

Transposing datasets

# Making copies of original dataset so we dont mess up the original one
tage20_29 = age20_29
tage30_39 = age30_39
tage40_49 = age40_49
tage50_59 = age50_59
tage60_69 = age60_69
tage70_79 = age70_79

# Transposing the datasets
rownames(tage20_29) <- tage20_29[,1]
tage20_29 <- tage20_29 [, -(1:2)]
tage20_29 <- as.data.frame(t(tage20_29))

rownames(tage30_39) <- tage30_39[,1]
tage30_39 <- tage30_39 [, -(1:2)]
tage30_39 <- as.data.frame(t(tage30_39))

rownames(tage40_49) <- tage40_49[,1]
tage40_49 <- tage40_49 [, -(1:2)]
tage40_49 <- as.data.frame(t(tage40_49))

rownames(tage50_59) <- tage50_59[,1]
tage50_59 <- tage50_59 [, -(1:2)]
tage50_59 <- as.data.frame(t(tage50_59))

rownames(tage60_69) <- tage60_69[,1]
tage60_69 <- tage60_69 [, -(1:2)]
tage60_69 <- as.data.frame(t(tage60_69))

rownames(tage70_79) <- tage70_79[,1]
tage70_79 <- tage70_79 [, -(1:2)]
tage70_79 <- as.data.frame(t(tage70_79))

Saving P values and cor values

AGE 20_29

i<-c()
cor_e <- c()
cor_p <- c()
for(i in 1:7627)
{
  u = cor.test(tage20_29[,colnames(tage20_29)=="ENSG00000130816"], tage20_29[,i],
               method = "pearson", p.adjust.methods = "fdr", use = 'pairwise.complete.obs', exact = F)
  cor_e[i] = u$estimate
  cor_p[i] = u$p.value
}

cor_test_20_29 <- data.frame(Gene_ID = colnames(tage20_29),
                       Gene_Name= age20_29[,2],
                       cor_value_20_29 = cor_e,
                       p_value_20_29  = cor_p)

AGE 30_39

i<-c()
cor_e <- c()
cor_p <- c()
for(i in 1:7627)
{
  u = cor.test(tage30_39[,colnames(tage30_39)=="ENSG00000130816"], tage30_39[,i],
               method = "pearson", p.adjust.methods = "fdr", use = 'pairwise.complete.obs', exact = F)
  cor_e[i] = u$estimate
  cor_p[i] = u$p.value
}

cor_test_30_39 <- data.frame(Gene_ID = colnames(tage30_39),
                       Gene_Name= age30_39[,2],
                       cor_value_30_39 = cor_e,
                       p_value_30_39  = cor_p)

AGE 40_49

i<-c()
cor_e <- c()
cor_p <- c()
for(i in 1:7627)
{
  u = cor.test(tage40_49[,colnames(tage40_49)=="ENSG00000130816"], tage40_49[,i],
               method = "pearson", p.adjust.methods = "fdr", use = 'pairwise.complete.obs', exact = F)
  cor_e[i] = u$estimate
  cor_p[i] = u$p.value
}

cor_test_40_49 <- data.frame(Gene_ID = colnames(tage40_49),
                       Gene_Name= age40_49[,2],
                       cor_value_40_49 = cor_e,
                       p_value_40_49  = cor_p)

AGE 50_59

i<-c()
cor_e <- c()
cor_p <- c()
for(i in 1:7627)
{
  u = cor.test(tage50_59[,colnames(tage50_59)=="ENSG00000130816"], tage50_59[,i],
               method = "pearson", p.adjust.methods = "fdr", use = 'pairwise.complete.obs', exact = F)
  cor_e[i] = u$estimate
  cor_p[i] = u$p.value
}

cor_test_50_59 <- data.frame(Gene_ID = colnames(tage50_59),
                       Gene_Name= age50_59[,2],
                       cor_value_50_59 = cor_e,
                       p_value_50_59  = cor_p)

AGE 60_69

i<-c()
cor_e <- c()
cor_p <- c()
for(i in 1:7627)
{
  u = cor.test(tage60_69[,colnames(tage60_69)=="ENSG00000130816"], tage60_69[,i],
               method = "pearson", p.adjust.methods = "fdr", use = 'pairwise.complete.obs', exact = F)
  cor_e[i] = u$estimate
  cor_p[i] = u$p.value
}

cor_test_60_69 <- data.frame(Gene_ID = colnames(tage60_69),
                       Gene_Name= age60_69[,2],
                       cor_value_60_69 = cor_e,
                       p_value_60_69  = cor_p)

AGE 70_79

i<-c()
cor_e <- c()
cor_p <- c()
for(i in 1:7627)
{
  u = cor.test(tage70_79[,colnames(tage70_79)=="ENSG00000130816"], tage70_79[,i],
               method = "pearson", p.adjust.methods = "fdr", use = 'pairwise.complete.obs', exact = F)
  cor_e[i] = u$estimate
  cor_p[i] = u$p.value
}

cor_test_70_79 <- data.frame(Gene_ID = colnames(tage70_79),
                       Gene_Name= age70_79[,2],
                       cor_value_70_79 = cor_e,
                       p_value_70_79  = cor_p)

AGE 30_39

i<-c()
cor_e <- c()
cor_p <- c()
for(i in 1:7627)
{
  u = cor.test(tage30_39[,colnames(tage30_39)=="ENSG00000130816"], tage30_39[,i],
               method = "pearson", p.adjust.methods = "fdr", use = 'pairwise.complete.obs', exact = F)
  cor_e[i] = u$estimate
  cor_p[i] = u$p.value
}

cor_test_30_39 <- data.frame(Gene_ID = colnames(tage30_39),
                       Gene_Name= age30_39[,2],
                       cor_value_30_39 = cor_e,
                       p_value_30_39  = cor_p)

Filtering Data

Now we will filter out data for which p value is less then 0.01 and cor value is greater than 0.9

Positively Corelated Genes

poscor20_29 <- cor_test_20_29[cor_test_20_29$cor_value_20_29 > 0.9 & cor_test_20_29$p_value_20_29 < 0.001,] 

poscor30_39 <- cor_test_30_39[cor_test_30_39$cor_value_30_39 > 0.9 & cor_test_30_39$p_value_30_39 < 0.001,]

Negatively Corelated Genes

Merging

commongenes <- merge(poscor20_29, poscor30_39, by.x = "Gene_ID", by.y = "Gene_ID")