This documentation provides a detailed description of the functions and workflow used for the analysis of how glycolysis is regulated by shear stress. This analysis is divided into sections according to the type of analysis conducted. Section I explores how glycolysis to flow on the transcript level, section II identifies regulators of glycolysis on the transcriptional level, Section III explores transcription factors that regulate glycolysis, section IV explores the post translational regulation of GCKR, section V investigates the epigenetic regulation of glycolysis in response to shear stress.
In this section, all libraries and assigned reference objects used in the analysis are loaded and the working directory is set (excluded from view). Functions for this analysis can be found at the following github repository: https://github.com/brengong/ConservationtextmineR
library(data.table)
library(pheatmap)
library(ggpubr)
library(ggplot2)
library(RColorBrewer)
library(biomaRt)
library(dplyr)
library(yarrr)
library(systemPipeR)
library(BiocParallel)
library(Biostrings)
library(Rsamtools)
library(GenomicRanges)
library(GenomicAlignments)
library(ShortRead)
library(ape)
library(rtracklayer)
library(ggbio)
library(ChIPpeakAnno)
library(GenomicFeatures)
library(ChIPseeker)
library(seqLogo)
library(BCRANK)
library(Gviz)
library(trackViewer)
library(ConservationtextmineR)
library(MyPackage)
library(tiff)
library(png)
#Organ <- "Homo sapiens"
#BioMartdataset <-"hsapiens_gene_ensembl"
PSOS <- fread("./results/5 PSOS FC data.xls")
PSOS$GeneName <- toupper(PSOS$GeneName)
glycolysis_genes <- c("SLC2A1", "SLC2A2", "SLC2A3", "SLC2A4", "SLC2A5",
"HK1","HK2", "HK3", "GCK", "GK", "GK5", "G6PC", "GPI", "GPI1",
"PFKM", "PFKL", "PFKP", "PFKFB3", "FBP1", "FBP2", "ALDOA",
"ALDOB", "ALDOC", "TPI1", "GAPDH", "GAPDHS", "PGK1", "PGK2",
"PGAM1", "PGAM2", "ENO", "ENO1", "ENO2", "ENO3", "PKM", "PKM2",
"PKLR", "LDHAL6B", "LDHA", "LDHB", "LDHC", "PCK1", "PCK2", "PC", "GK3P")
genes <- glycolysis_genes
interest_genes <- NULL
for(i in 1:length(genes)){
ID <- PSOS[PSOS$GeneName == genes[i],]
interest_genes <- rbind(interest_genes, ID)
}
interest_genes <- interest_genes[!duplicated(interest_genes$GeneName),]
interest_genes[,c(1,2)]
## TranscriptID GeneName
## 1: NM_006516_chr1 SLC2A1
## 2: NM_006931_chr12 SLC2A3
## 3: NM_000188_chr10 HK1
## 4: NM_000189_chr2 HK2
## 5: NM_001128127_chrX GK
## 6: NM_001039547_chr3 GK5
## 7: NM_000175_chr19 GPI
## 8: NM_000289_chr12 PFKM
## 9: NM_002626_chr21 PFKL
## 10: NM_002627_chr10 PFKP
## 11: NM_001145443_chr10 PFKFB3
## 12: NM_001127628_chr9 FBP1
## 13: NM_000034_chr16 ALDOA
## 14: NM_005165_chr17 ALDOC
## 15: NM_000365_chr12 TPI1
## 16: NM_002046_chr12 GAPDH
## 17: NM_014364_chr19 GAPDHS
## 18: NM_000291_chrX PGK1
## 19: NM_002629_chr10 PGAM1
## 20: NM_000290_chr7 PGAM2
## 21: NM_001428_chr1 ENO1
## 22: NM_001975_chr12 ENO2
## 23: NM_001193503_chr17 ENO3
## 24: NM_002654_chr15 PKM2
## 25: NM_001135239_chr11 LDHA
## 26: NM_001174097_chr12 LDHB
## 27: NM_001018073_chr14 PCK2
## 28: NM_000920_chr11 PC
## TranscriptID GeneName
dat <- PSOS[,c(2,5:22)]
gly <- dat[(dat$GeneName %in% glycolysis_genes),]
gly <- gly[gly[, .I[which.min(`PS24-OS24_logFC`)], by=GeneName]$V1]
mel <- melt(gly[,c(1,2,4,6,8,10,12,14,16,18)], id = 1)
NUM <- NULL
for(i in 1:nrow(mel)){
SPL <- strsplit(as.character(mel$variable)[i], split = "_")
SPL <- strsplit(SPL[[1]][1], split = "-")[[1]][1]
SPL <- strsplit(SPL, split = "")[[1]]
SPL <- SPL[3:length(SPL)]
SPL <- paste(SPL, collapse = "")
NUM[i] <- as.numeric(SPL)
}
mel$time <- NUM
pirateplot(formula = value ~ time,
data = mel,
xlab = "Time (hr)",
ylab = "Fold Change")
mel$time <- as.factor(mel$time)
p<-ggplot(mel, aes(x=time, y=value, fill=time)) +
geom_boxplot() +
theme_pubr();p
#### Subset out columns and glycolysis genes of interest ####
dat <- PSOS[,c(2,17:22)]
gly <- dat[(dat$GeneName %in% glycolysis_genes),]
gly <- gly[order(gly$`PS24-OS24_logFC`),]
gly <- gly[!gly$GeneName == "GK",]
gly <- gly[!gly$GeneName == "GK5",]
gly <- gly[!gly$GeneName == "PCK2",]
gly <- gly[!gly$GeneName == "GAPDHS",]
gly <- gly[!gly$GeneName == "PC",]
#### Average the last three time points ####
gly$average <- ave(gly$`PS16-OS16_logFC`, gly$`PS20-OS20_logFC`, gly$`PS24-OS24_logFC`)
gly <- gly[gly[, .I[which.min(average)], by=GeneName]$V1]
gly <- gly[order(gly$average),]
#### group genes according to fold change direction
group <- NULL
for(i in 1:nrow(gly)){
if(gly$average[i] < 0){
group[i] <- "LESS"
}else{
group[i] <- "MORE"
}
}
#group
gly$group <- group
setnames(gly, c(colnames(gly)), c("GeneName", "FC16", "16pval", "FC20", "20pval", "FC24", "24pval", "average", "group"))
p <- ggbarplot(gly, x = "GeneName", y = "average",
fill = "group", # change fill color by cyl
color = "white", # Set bar border colors to white
palette = "npg", # jco journal color palett. see ?ggpar
sort.val = "asc", # Sort the value in dscending order
sort.by.groups = TRUE, # Sort inside each group
x.text.angle = 90 # Rotate vertically x axis texts
);p
FC <- PSOS[,c(2, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21)]
regulatory_genes <- c("HK1","HK2", "HK3", "PFKM", "PFKL", "PFKP")
FC <- FC[(FC$GeneName %in% regulatory_genes)]
FC <- FC[FC[, .I[which.min(`PS24-OS24_logFC`)], by=GeneName]$V1]
mel <- melt(FC, id = 1)
NUM <- NULL
for(i in 1:nrow(mel)){
SPL <- strsplit(as.character(mel$variable)[i], split = "_")
SPL <- strsplit(SPL[[1]][1], split = "-")[[1]][1]
SPL <- strsplit(SPL, split = "")[[1]]
SPL <- SPL[3:length(SPL)]
SPL <- paste(SPL, collapse = "")
NUM[i] <- as.numeric(SPL)
}
#NUM
mel$time <- NUM
mel$time <- factor(mel$time)
#brewer.pal(9,"Oranges")
p <- ggplot(data=mel, aes(x=time, y=value, group=GeneName))+
geom_line(size = 1.25, colour = "gray") +
geom_point(shape = 21, size = 2, colour = "black", fill = "gray")+
theme_pubr()+
geom_line(data = subset(mel, GeneName == "HK1"),
aes(x=`time`, y=value), size = 1.25, colour = "#E31A1C")+
geom_point(data = subset(mel, GeneName == "HK1"),
aes(x=`time`, y=value), shape = 21, size = 2, colour = "black", fill = "#E31A1C");p
FC <- PSOS[,c(2, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21)]
regulatory_genes <- c("HK1","HK2", "HK3", "PFKM", "PFKL", "PFKP")
FC <- FC[(FC$GeneName %in% regulatory_genes)]
#### Average ####
FC$average <- rowMeans(FC[,c(2:11)])
FC <- FC[FC[, .I[which.min(`average`)], by=GeneName]$V1]
dt2 <- FC[,c(1, 8:11)]
dt2 <- melt(dt2, id = 1)
#### generate plot ####
dt2$GeneName <- factor(dt2$GeneName, levels= c("HK1", "PFKM", "PFKP", "HK2", "PFKL"),labels = c("HK1", "PFKM", "PFKP", "HK2", "PFKL"))
# my_comparisons <- list( c("hr2", "hr8"), c("hr8", "hr16"), c("hr2", "hr16") )
p<- ggplot(dt2, aes(x=GeneName, y=value, fill=GeneName)) +
geom_violin(width = 1)+ #fill = c("#1B9E77", "#D95F02", "#7570B3")
# geom_dotplot(binaxis='y', stackdir='center', fill = "black", dotsize = .5)+
scale_fill_brewer(palette="Dark2")+
geom_boxplot(fill = c("grey"), width=0.2)+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black")+ #, fill = "black"
# geom_jitter(data= dt2, aes(x = GeneName, y = value), shape = 1, width = .05, fill = "grey")+
theme_pubr()#+
p
DATA <- fread("./results/99 qPCR data.csv", sep=",")
DATA$group <- paste(DATA$Gene, DATA$Tratment, sep = "_")
DATA <- DATA[DATA$Experiment == "PSOS",]
#### Take data averages ####
mea <- DATA[ ,.(mean_actin = mean(Actin_CTRL_Cq), mean_gene = mean(Cq_value)), by = group]
#### take differences between averages ####
mea$diff <- mea$mean_gene - mea$mean_actin
#### compute fold changes ####
spl <- unlist(strsplit(mea[grep("PS", mea$group),]$group, split = "_")); name <- spl[!grepl("PS", spl)]
DT <- data.table(Gene = name,
PS = mea[grep("PS", mea$group),]$diff,
OS = mea[grep("OS", mea$group),]$diff,
FC = 2^-(mea[grep("PS", mea$group),]$diff-mea[grep("OS", mea$group),]$diff))
# Take means
DT[, means := mean(FC), by = Gene]
DT$log2FC <- log2(DT$FC)
DT
## Gene PS OS FC means log2FC
## 1: HK1 8.105000 5.863333 0.2114419 0.2523570 -2.2416667
## 2: HK1 8.913333 7.256667 0.3171711 0.2523570 -1.6566667
## 3: HK1 7.903333 5.773333 0.2284579 0.2523570 -2.1300000
## 4: GPI1 8.466667 7.123333 0.3941090 0.3857469 -1.3433333
## 5: GPI1 8.880000 7.486667 0.3806842 0.3857469 -1.3933333
## 6: GPI1 8.390000 7.003333 0.3824474 0.3857469 -1.3866667
## 7: PFKM 11.536667 10.296667 0.4233727 0.4514453 -1.2400000
## 8: PFKM 10.020000 9.013333 0.4976948 0.4514453 -1.0066667
## 9: PFKM 6.740000 5.533333 0.4332685 0.4514453 -1.2066667
## 10: ALDOA 8.990000 8.046667 0.5200300 0.5128989 -0.9433333
## 11: ALDOA 9.006667 8.026667 0.5069797 0.5128989 -0.9800000
## 12: ALDOA 8.483333 7.516667 0.5116869 0.5128989 -0.9666667
## 13: TPI1 6.156667 4.120000 0.2437262 0.2339326 -2.0366667
## 14: TPI1 7.196667 5.040000 0.2242739 0.2339326 -2.1566667
## 15: TPI1 8.940000 6.843333 0.2337978 0.2339326 -2.0966667
## 16: GAPDH 7.296667 6.253333 0.4852051 0.4546324 -1.0433333
## 17: GAPDH 9.003333 7.833333 0.4444213 0.4546324 -1.1700000
## 18: GAPDH 6.366667 5.163333 0.4342707 0.4546324 -1.2033333
## 19: PGK1 8.870000 8.176667 0.6184233 0.6649476 -0.6933333
## 20: PGK1 4.743333 4.188333 0.6806571 0.6649476 -0.5550000
## 21: PGK1 4.290000 3.766667 0.6957624 0.6649476 -0.5233333
## 22: PGAM1 8.903333 7.883333 0.4931164 0.4955302 -1.0200000
## 23: PGAM1 6.950000 5.900000 0.4829682 0.4955302 -1.0500000
## 24: PGAM1 6.870000 5.900000 0.5105061 0.4955302 -0.9700000
## 25: ENO3 9.000000 7.953333 0.4840853 0.4782967 -1.0466667
## 26: ENO3 4.583333 3.550000 0.4885800 0.4782967 -1.0333333
## 27: ENO3 4.926667 3.813333 0.4622248 0.4782967 -1.1133333
## 28: PKM2 6.790000 5.956667 0.5612310 0.5549115 -0.8333333
## 29: PKM2 8.783333 7.890000 0.5383688 0.5549115 -0.8933333
## 30: PKM2 8.150000 7.326667 0.5651347 0.5549115 -0.8233333
## 31: LDHA 6.480000 4.926667 0.3407219 0.3313080 -1.5533333
## 32: LDHA 8.483333 6.826667 0.3171711 0.3313080 -1.6566667
## 33: LDHA 9.716667 8.143333 0.3360311 0.3313080 -1.5733333
## Gene PS OS FC means log2FC
DT$Gene <- factor(DT$Gene, levels= c("HK1", "GPI1", "PFKM", "ALDOA", "TPI1", "GAPDH", "PGK1", "PGAM1", "ENO3", "PKM2", "LDHA"),labels = c("HK1", "GPI1", "PFKM", "ALDOA", "TPI1", "GAPDH", "PGK1", "PGAM1", "ENO3", "PKM2", "LDHA"))
# my_comparisons <- list( c("hr2", "hr8"), c("hr8", "hr16"), c("hr2", "hr16") )
p<- ggplot(DT, aes(x=Gene, y=log2FC)) + # , fill=Gene
# geom_violin(width = 1.8, fill = c("#1B9E77"))+ #, "#D95F02", "#7570B3"
# # scale_fill_brewer(palette="Dark2")+
geom_boxplot(fill = c("#1B9E77"), width=0.9)+
# geom_bar(stat="identity") + #fill="skyblue", alpha=0.5
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black", fill = "black")+
theme_pubr()#+
# ylim(c(0, 2))
p
Download reference genome and GTF file.
#### Download human GFF/GTF and genome (fasta) file
URL <- "ftp://ftp.ensembl.org/pub/release-95/gtf/homo_sapiens/Homo_sapiens.GRCh38.95.gtf.gz"
download.file(URL, destfile = "./data/GTF.gtf.gz")
URLGEN <- "ftp://ftp.ensembl.org/pub/release-95/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
download.file(URLGEN, destfile = "./data/genome.fa.gz")
Read param file and targets file.
#### Designate param path ####
parampath <- paste(homedir, "/results/hisat2PE.param", sep = "")
read.delim(parampath, comment.char = "#")
## PairSet Name Value
## 1 modules <NA> hisat2/2.0.1
## 2 software <NA> hisat2
## 3 cores -p 7
## 4 other <NA> -k 1 --min-intronlen 30 --max-intronlen 3000
## 5 outfile1 -S <FileName1>
## 6 outfile1 path ./results/
## 7 outfile1 remove <NA>
## 8 outfile1 append .hisat.sam
## 9 outfile1 outextension .hisat.bam
## 10 reference <NA> ./data/genome.fasta
## 11 infile1 -U <FileName1>
## 12 infile1 path <NA>
## 13 infile2 -U <FileName2>
## 14 infile2 path <NA>
#### Check targets.txt file ####
targets <- read.delim("./results/1_targetsPE_chip.txt", comment.char ="#")
targets#[1:3,]
## FileName1 FileName2 SampleName Factor SampleLong Experiment Date SampleReference
## 1 ./data/0hr-1_S1_L008_R1_001.fastq.gz ./data/0hr-1_S1_L008_R2_001.fastq.gz static1 static static 1 20-Dec-2019 NA
## 2 ./data/0hr-2_S2_L008_R1_001.fastq.gz ./data/0hr-2_S2_L008_R2_001.fastq.gz static2 static static 1 20-Dec-2019 NA
## 3 ./data/0hr-3_S3_L008_R1_001.fastq.gz ./data/0hr-3_S3_L008_R2_001.fastq.gz static3 static static 1 20-Dec-2019 NA
## 4 ./data/2hr-OS1_S4_L008_R1_001.fastq.gz ./data/2hr-OS1_S4_L008_R2_001.fastq.gz OS_2hr1 OS_2hr OS_2hr 1 20-Dec-2019 NA
## 5 ./data/2hr-OS2_S5_L008_R1_001.fastq.gz ./data/2hr-OS2_S5_L008_R2_001.fastq.gz OS_2hr2 OS_2hr OS_2hr 1 20-Dec-2019 NA
## 6 ./data/2hr-OS3_S6_L008_R1_001.fastq.gz ./data/2hr-OS3_S6_L008_R2_001.fastq.gz OS_2hr3 OS_2hr OS_2hr 1 20-Dec-2019 NA
## 7 ./data/2hr-PS1_S13_L008_R1_001.fastq.gz ./data/2hr-PS1_S13_L008_R2_001.fastq.gz PS_2hr1 PS_2hr PS_2hr 1 20-Dec-2019 NA
## 8 ./data/2hr-PS2_S14_L008_R1_001.fastq.gz ./data/2hr-PS2_S14_L008_R2_001.fastq.gz PS_2hr2 PS_2hr PS_2hr 1 20-Dec-2019 NA
## 9 ./data/2hr-PS3_S15_L008_R1_001.fastq.gz ./data/2hr-PS3_S15_L008_R2_001.fastq.gz PS_2hr3 PS_2hr PS_2hr 1 20-Dec-2019 NA
## 10 ./data/8hr-OS1_S7_L008_R1_001.fastq.gz ./data/8hr-OS1_S7_L008_R2_001.fastq.gz OS_8hr1 OS_8hr OS_8hr 1 20-Dec-2019 NA
## 11 ./data/8hr-OS2_S8_L008_R1_001.fastq.gz ./data/8hr-OS2_S8_L008_R2_001.fastq.gz OS_8hr2 OS_8hr OS_8hr 1 20-Dec-2019 NA
## 12 ./data/8hr-OS3_S9_L008_R1_001.fastq.gz ./data/8hr-OS3_S9_L008_R2_001.fastq.gz OS_8hr3 OS_8hr OS_8hr 1 20-Dec-2019 NA
## 13 ./data/8hr-PS1_S16_L008_R1_001.fastq.gz ./data/8hr-PS1_S16_L008_R2_001.fastq.gz PS_8hr1 PS_8hr PS_8hr 1 20-Dec-2019 NA
## 14 ./data/8hr-PS2_S17_L008_R1_001.fastq.gz ./data/8hr-PS2_S17_L008_R2_001.fastq.gz PS_8hr2 PS_8hr PS_8hr 1 20-Dec-2019 NA
## 15 ./data/8hr-PS3_S18_L008_R1_001.fastq.gz ./data/8hr-PS3_S18_L008_R2_001.fastq.gz PS_8hr3 PS_8hr PS_8hr 1 20-Dec-2019 NA
## 16 ./data/16hr-OS1_S10_L008_R1_001.fastq.gz ./data/16hr-OS1_S10_L008_R2_001.fastq.gz OS_16hr1 OS_16hr OS_16hr 1 20-Dec-2019 NA
## 17 ./data/16hr-OS2_S11_L008_R1_001.fastq.gz ./data/16hr-OS2_S11_L008_R2_001.fastq.gz OS_16hr2 OS_16hr OS_16hr 1 20-Dec-2019 NA
## 18 ./data/16hr-OS3_S12_L008_R1_001.fastq.gz ./data/16hr-OS3_S12_L008_R2_001.fastq.gz OS_16hr3 OS_16hr OS_16hr 1 20-Dec-2019 NA
## 19 ./data/16hr-PS1_S19_L008_R1_001.fastq.gz ./data/16hr-PS1_S19_L008_R2_001.fastq.gz PS_16hr1 PS_16hr PS_16hr 1 20-Dec-2019 NA
## 20 ./data/16hr-PS2_S20_L008_R1_001.fastq.gz ./data/16hr-PS2_S20_L008_R2_001.fastq.gz PS_16hr2 PS_16hr PS_16hr 1 20-Dec-2019 NA
## 21 ./data/16hr-PS3_S21_L008_R1_001.fastq.gz ./data/16hr-PS3_S21_L008_R2_001.fastq.gz PS_16hr3 PS_16hr PS_16hr 1 20-Dec-2019 NA
## 22 ./data/Undetermined_S0_L008_R1_001.fastq.gz ./data/Undetermined_S0_L008_R2_001.fastq.gz input input input 1 20-Dec-2019 NA
#### Make sure the text file is prepared correctly. Should return "TRUE" for all three checks ####
identical(as.character(targets$SampleName), make.names(as.character(targets$SampleName), unique = TRUE)) # Make approperiate names # make.names(c("a and b", "a-and-b"), unique = TRUE)
## [1] TRUE
identical(as.character(targets$Factor), make.names(as.character(targets$Factor), unique = FALSE))
## [1] TRUE
identical(targets$Factor, targets$SampleLong)
## [1] TRUE
targets$Factor
## [1] "static" "static" "static" "OS_2hr" "OS_2hr" "OS_2hr" "PS_2hr" "PS_2hr" "PS_2hr" "OS_8hr" "OS_8hr" "OS_8hr" "PS_8hr" "PS_8hr" "PS_8hr" "OS_16hr" "OS_16hr" "OS_16hr" "PS_16hr" "PS_16hr" "PS_16hr" "input"
targets$SampleLong
## [1] "static" "static" "static" "OS_2hr" "OS_2hr" "OS_2hr" "PS_2hr" "PS_2hr" "PS_2hr" "OS_8hr" "OS_8hr" "OS_8hr" "PS_8hr" "PS_8hr" "PS_8hr" "OS_16hr" "OS_16hr" "OS_16hr" "PS_16hr" "PS_16hr" "PS_16hr" "input"
Align reads to genome
args <- systemArgs(sysma=parampath, mytargets="./text_files/1_targetsPE_chip.txt")
#### 1 Index reference genome ####
system("hisat2-build ./data/genome.fasta ./data/genome.fasta")
#### 2 Align all FASTq files with hisat2 on a single computer
bampaths <- runCommandline(args=args)
#### Generate an allignment summary
(read_statsDF <- alignStats(args=args))#[1:8,]
write.table(read_statsDF, "./results/2 alignStats.xls", row.names=FALSE, quote=FALSE, sep="\t")
writeTargetsout(x=args, file="./text_files/2_targets_bam.txt", overwrite=TRUE)
## FileName Nreads2x Nalign Perc_Aligned Nalign_Primary Perc_Aligned_Primary
## 1: static1 11613157 4849042 41.75473 4849042 41.75473
## 2: static2 36574442 28448522 77.78252 28448522 77.78252
## 3: static3 46772826 36310134 77.63083 36310134 77.63083
## 4: OS_2hr1 43938578 26709760 60.78886 26709760 60.78886
## 5: OS_2hr2 27796976 17973378 64.65947 17973378 64.65947
## 6: OS_2hr3 25620326 11935347 46.58546 11935347 46.58546
## 7: PS_2hr1 21393002 14027398 65.57003 14027398 65.57003
## 8: PS_2hr2 34557578 23730867 68.67052 23730867 68.67052
## 9: PS_2hr3 26192074 16464010 62.85875 16464010 62.85875
## 10: OS_8hr1 33100262 24308343 73.43852 24308343 73.43852
## 11: OS_8hr2 28767116 20900335 72.65356 20900335 72.65356
## 12: OS_8hr3 35493298 20998460 59.16176 20998460 59.16176
## 13: PS_8hr1 40123484 31096219 77.50129 31096219 77.50129
## 14: PS_8hr2 30308654 21741341 71.73311 21741341 71.73311
## 15: PS_8hr3 34816504 27340454 78.52728 27340454 78.52728
## 16: OS_16hr1 37267074 28337098 76.03789 28337098 76.03789
## 17: OS_16hr2 34986902 23362457 66.77487 23362457 66.77487
## 18: OS_16hr3 48378964 38664230 79.91951 38664230 79.91951
## 19: PS_16hr1 19909402 13100633 65.80124 13100633 65.80124
## 20: PS_16hr2 36472236 24789415 67.96791 24789415 67.96791
## 21: PS_16hr3 31003294 18494119 59.65211 18494119 59.65211
## 22: input 37100936 18615299 50.17474 18615299 50.17474
## FileName Nreads2x Nalign Perc_Aligned Nalign_Primary Perc_Aligned_Primary
Conduct peak calling.
#### Merge BAM files of replicates prior to peak calling ####
args <- systemArgs(sysma=NULL, mytargets="./text_files/2_targets_bam.txt")
args_merge <- mergeBamByFactor(args, overwrite=TRUE)
#### Peak calling without input/reference sample #### with MACS2
args <- systemArgs(sysma="param/macs2_noinput.param", mytargets="./text_files/3_targets_mergeBamByFactor.txt")
sysargs(args)[1] # Command-line parameters for first FASTQ file
runCommandline(args)
writeTargetsout(x=args, file="./text_files/5_targets_macs.txt", overwrite=TRUE)
#### Identify consensus peaks ####
source("http://faculty.ucr.edu/~tgirke/Documents/R_BioCond/My_R_Scripts/rangeoverlapper.R") # olRanges function
# peak_M1A <- outpaths(args_input)#["OS1"]#["M1A"]
peak_M1A <- outpaths(args)[6]
peak_M1A
peak_M1A <- as(read.delim(peak_M1A, comment="#")[,1:3], "GRanges")
peak_A1A <- outpaths(args)[7]#["A1A"]
peak_A1A
peak_A1A <- as(read.delim(peak_A1A, comment="#")[,1:3], "GRanges")
(myol1 <- subsetByOverlaps(peak_M1A, peak_A1A, minoverlap=1)) # Returns any overlap
myol2 <- olRanges(query=peak_M1A, subject=peak_A1A, output="gr") # Returns any overlap with OL length information
myol2[values(myol2)["OLpercQ"][,1]>=50] # Returns only query peaks with a minimum overlap of 50%
#### export to dataframe and save ####
genes_df = as(myol2, "data.frame")
write.table(genes_df, file = "./results/3 overlapping_granges.xls", quote=FALSE, row.names=FALSE, col.names=TRUE, sep="\t")
genes_df
## seqnames start end width strand space Qindex Sindex Qstart Qend Sstart Send OLstart OLend OLlength OLpercQ OLpercS OLtype
## 1: 1 9980 10436 457 * 1 1 1 9980 10436 9980 10262 9980 10262 283 61.92560 100.00000 inside
## 2: 1 199222 200270 1049 * 1 13 4 199222 200270 199705 199904 199705 199904 200 19.06578 100.00000 inside
## 3: 1 432556 432825 270 * 1 21 6 432556 432825 432633 432881 432633 432825 193 71.48148 77.51004 oldown
## 4: 1 528525 528732 208 * 1 23 7 528525 528732 528434 528658 528525 528658 134 64.42308 59.55556 olup
## 5: 1 629099 630054 956 * 1 25 10 629099 630054 629102 630042 629102 630042 941 98.43096 100.00000 inside
## ---
## 7570: Y 56847043 56847326 284 * Y 169465 27645 56847043 56847326 56847131 56847350 56847131 56847326 196 69.01408 89.09091 oldown
## 7571: Y 56849405 56849626 222 * Y 169467 27646 56849405 56849626 56849268 56849526 56849405 56849526 122 54.95495 47.10425 olup
## 7572: Y 56850416 56850886 471 * Y 169468 27647 56850416 56850886 56850688 56850964 56850688 56850886 199 42.25053 71.84116 oldown
## 7573: Y 56880180 56880492 313 * Y 169470 27648 56880180 56880492 56880188 56880387 56880188 56880387 200 63.89776 100.00000 inside
## 7574: Y 56884985 56885250 266 * Y 169471 27649 56884985 56885250 56885073 56885284 56885073 56885250 178 66.91729 83.96226 oldown
Peak annotation with ChIPpeakAnno package without input/reference sample.
args <- systemArgs(sysma="./param/annotate_peaks.param", mytargets="./text_files/5_targets_macs.txt")
# txdb <- makeTxDbFromGFF(file="data/GTF.gtf.gz", format="gtf", dataSource="ensembl", organism="Homo sapiens")
# saveDb(txdb, file="./data/GTF.sqlite")
txdb <- loadDb("./data/GTF.sqlite")
ge <- genes(txdb, columns=c("tx_name", "gene_id", "tx_type"))
for(i in seq(along=args)){
peaksGR <- as(read.delim(infile1(args)[i], comment="#"), "GRanges")
annotatedPeak <- suppressWarnings(annotatePeakInBatch(peaksGR, AnnotationData=genes(txdb)))
# df <- data.frame(as.data.frame(annotatedPeak), as.data.frame(values(ge[values(annotatedPeak)$feature,]))) # Must remove any NA values from the feature column
df <- data.frame(as.data.frame(annotatedPeak[!is.na(values(annotatedPeak)$feature)]), as.data.frame(values(annotatedPeak)$feature[!is.na(values(annotatedPeak)$feature)]))
spl <- str_split(outpaths(args[i]), ".xls")[[1]][1]
out <- paste(spl, "peakAnno.xls", sep = "")
write.table(df, out, quote=FALSE, row.names=FALSE, sep="\t")
# write.table(df, outpaths(args[i]), quote=FALSE, row.names=FALSE, sep="\t")
}
writeTargetsout(x=args, file="./text_files/6_targets_peakanno.txt", overwrite=TRUE)
#### Peak annotation with ChIPseeker package #### http://bioconductor.org/packages/release/bioc/vignettes/ChIPseeker/inst/doc/ChIPseeker.html
library(stringr)
for(i in seq(along=args)) {
peakAnno <- annotatePeak(infile1(args)[i], TxDb=txdb, verbose=FALSE)
df <- as.data.frame(peakAnno)
# spl <- str_split(outpaths(args[i]), ".xls")[[1]][1]
# out <- paste(spl, "ChiPseeker.xls", sep = "")
# write.table(df, out, quote=FALSE, row.names=FALSE, sep="\t")
write.table(df, outpaths(args[i]), quote=FALSE, row.names=FALSE, sep="\t")
}
writeTargetsout(x=args, file="./text_files/7_targets_CHiPseeker.txt", overwrite=TRUE)
args <- systemArgs(sysma="./results/annotate_peaks.param", mytargets="./results/5_targets_macs_windows.txt")
txdb <- loadDb("./results/GTF.sqlite")
###############################################
#### Summary plots with ChIPseeker package ####
###############################################
#### generate promoter annotation and tag Matrix ####
promoter <- getPromoters(TxDb=txdb, upstream=3000, downstream=3000)
tagMatrixList <- lapply(infile1(args), getTagMatrix, windows=promoter)
plotAvgProf(tagMatrixList, xlim=c(-3000, 3000))
plotAvgProf(tagMatrixList, xlim=c(-3000, 3000), facet="row")
tagHeatmap(tagMatrixList, xlim=c(-3000, 3000), color=NULL)
#### Count reads overlapping peaks
library(GenomicRanges)
# args <- systemArgs(sysma = "./param/count_rangesets.param", mytargets = "./text_files/5_targets_macs.txt")
args <- systemArgs(sysma = "./param/count_rangesets.param", mytargets = "./text_files/5_targets_macs_windows.txt")
args_bam <- systemArgs(sysma = NULL, mytargets = "./text_files/2_targets_bam_windows.txt")
# args_bam <- systemArgs(sysma = NULL, mytargets = "./text_files/2_targets_bam.txt")
bfl <- BamFileList(outpaths(args_bam), yieldSize = 50000, index = character())
countDFnames <- countRangeset(bfl, args, mode = "Union", ignore.strand = TRUE)
writeTargetsout(x = args, file = "./text_files/8_targets_countDF.txt", overwrite = TRUE)
#### Differential binding analysis
# args_diff <- systemArgs(sysma = "./param/rundiff.param", mytargets = "./text_files/8_targets_countDF.txt")
args_diff <- systemArgs(sysma = "./param/rundiff.param", mytargets = "./text_files/8_targets_countDF.txt")
cmp <- readComp(file = args_bam, format = "matrix")
dbrlist <- runDiff(args = args_diff, diffFct = run_edgeR, targets = targetsin(args_bam),
cmp = cmp[[1]], independent = TRUE, dbrfilter = c(Fold = 2, FDR = 1))
writeTargetsout(x = args_diff, file = "9_targets_rundiff.txt", overwrite = TRUE)
Convert Bam files to BigWig files
dir.create("./results/bigwig_files")
#### input files ####
bamfile1 <- "./results/0hr-1_S1_L008_R1_001.fastq.gz.hisat_static.bam"
bamfile2 <- "./results/2hr-OS1_S4_L008_R1_001.fastq.gz.hisat_OS_2hr.bam"
bamfile3 <- "./results/2hr-PS1_S13_L008_R1_001.fastq.gz.hisat_PS_2hr.bam"
bamfile4 <- "./results/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bam"
bamfile5 <- "./results/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bam"
bamfile6 <- "./results/16hr-OS1_S10_L008_R1_001.fastq.gz.hisat_OS_16hr.bam"
bamfile7 <- "./results/16hr-PS1_S19_L008_R1_001.fastq.gz.hisat_PS_16hr.bam"
bamfile8 <- "./results/Undetermined_S0_L008_R1_001.fastq.gz.hisat.bam"
#### output files ####
BWfile1 <- "./results/bigwig_files/0hr-1_S1_L008_R1_001.fastq.gz.hisat_static.bw"
BWfile2 <- "./results/bigwig_files/2hr-OS1_S4_L008_R1_001.fastq.gz.hisat_OS_2hr.bw"
BWfile3 <- "./results/bigwig_files/2hr-PS1_S13_L008_R1_001.fastq.gz.hisat_PS_2hr.bw"
BWfile4 <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw"
BWfile5 <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw"
BWfile6 <- "./results/bigwig_files/16hr-OS1_S10_L008_R1_001.fastq.gz.hisat_OS_16hr.bw"
BWfile7 <- "./results/bigwig_files/16hr-PS1_S19_L008_R1_001.fastq.gz.hisat_PS_16hr.bw"
BWfile8 <- "./results/bigwig_files/Undetermined_S0_L008_R1_001.fastq.gz.hisat.bw"
#### generate BigWig files ####
alignment <- readGAlignments(bamfile1); reads_coverage <- coverage(alignment); export.bw(reads_coverage, con = BWfile1)
alignment <- readGAlignments(bamfile2); reads_coverage <- coverage(alignment); export.bw(reads_coverage, con = BWfile2)
alignment <- readGAlignments(bamfile3); reads_coverage <- coverage(alignment); export.bw(reads_coverage, con = BWfile3)
alignment <- readGAlignments(bamfile4); reads_coverage <- coverage(alignment); export.bw(reads_coverage, con = BWfile4)
alignment <- readGAlignments(bamfile5); reads_coverage <- coverage(alignment); export.bw(reads_coverage, con = BWfile5)
alignment <- readGAlignments(bamfile6); reads_coverage <- coverage(alignment); export.bw(reads_coverage, con = BWfile6)
alignment <- readGAlignments(bamfile7); reads_coverage <- coverage(alignment); export.bw(reads_coverage, con = BWfile7)
alignment <- readGAlignments(bamfile8); reads_coverage <- coverage(alignment); export.bw(reads_coverage, con = BWfile8)
Plot peaks for 8 hour time points in glycolysis genes
library(rtracklayer)
#### HK1 ####
#############
# chr10 69266000-69405881
chr <- "10"
start <- 69269991 - 5000 #69266000
end <- 69269991 + 5000 #69405881
p1 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "all")
#### GPI ####
#############
# chr19 34360740-34406413
chr <- "19"
start <- 34353330 - 5000 #34360740
end <- 34353330 + 5000 #34406413
p2 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "all")
#### PFKM ####
##############
# chr12:48101253-48150404
chr <- "12"
start <- 48105253 - 5000 # 48101253
end <- 48105253 + 5000 # 48150404
p3 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "all")
#### ALDOA ####
###############
# chr16:30061760-30076207
chr <- "16"
start <- 30064279 - 5000 #30061760
end <- 30064279 + 5000 #30076207
p4 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "all")
#### TPI1 ####
##############
# chr12:6863420-6874946
chr <- "12"
start <- 6866834 - 5000 # 6863420
end <- 6866834 + 5000 # 6874946
p5 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "all")
#### GAPDH ####
###############
# chr12:6530517-6542371
chr <- "12"
start <- 6534517 -5000 # 6530517
end <- 6534517 + 5000 # 6542371
p6 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "all")
#### PGK1 ####
##############
# chrX:78100248-78133295
chr <- "x"
start <- 78104248 - 5000 # 78100248
end <- 78104248 + 5000 # 78133295
p6 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "all")
#### PGAM1 ####
###############
# chr10:97422191-97437444
chr <- "10"
start <- 97426191 - 5000 # 97422191
end <- 97426191 + 5000 # 97437444
p7 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "all")
#### ENO3 ####
##############
# chr17:4947119-4961129
chr <- "17"
start <- 70653125 - 5000 # 4947119
end <- 70653125 + 5000 # 4961129
p8 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "all")
#### PKM ####
#############
# chr15:72195029-72235624
chr <- "15"
start <- 72199029 - 5000 # 72195029
end <- 72199029 + 5000 # 72235624
p9 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "all")
#### LDHA ####
##############
# chr11:18390563-18412425
chr <- "11"
start <- 18394389 - 5000 # 18390563
end <- 18394389 + 5000 # 18412425
p10 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "all")
multiplot(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, cols = 1)
Plot only called peaks for 8 hour time point. HK1, GPI and ENO3 had no called peaks in their promoters
#### PFKM ####
##############
# chr12:48101253-48150404
chr <- "12"
start <- 48105253 - 5000 # 48101253
end <- 48105253 + 5000 # 48150404
p3 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "called",
x_lab = "PFKM",
peaks_tx <- "./results/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bam_macs2_peaks.annotatedpeakAnno.xls",
peaks_ctrl <- "./results/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bam_macs2_peaks.annotatedpeakAnno.xls")
#### ALDOA ####
###############
# chr16:30061760-30076207
chr <- "16"
start <- 30064279 - 5000 #30061760
end <- 30064279 + 5000 #30076207
p4 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "called",
x_lab = "ALDOA",
peaks_tx <- "./results/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bam_macs2_peaks.annotatedpeakAnno.xls",
peaks_ctrl <- "./results/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bam_macs2_peaks.annotatedpeakAnno.xls")
#### TPI1 ####
##############
# chr12:6863420-6874946
chr <- "12"
start <- 6866834 - 5000 # 6863420
end <- 6866834 + 5000 # 6874946
p5 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "called",
x_lab = "TPI1",
peaks_tx <- "./results/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bam_macs2_peaks.annotatedpeakAnno.xls",
peaks_ctrl <- "./results/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bam_macs2_peaks.annotatedpeakAnno.xls")
#### GAPDH ####
###############
# chr12:6530517-6542371
chr <- "12"
start <- 6534517 -5000 # 6530517
end <- 6534517 + 5000 # 6542371
p6 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "called",
x_lab = "GAPDH",
peaks_tx <- "./results/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bam_macs2_peaks.annotatedpeakAnno.xls",
peaks_ctrl <- "./results/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bam_macs2_peaks.annotatedpeakAnno.xls")
#### PGK1 ####
##############
# chrX:78100248-78133295
chr <- "X"
start <- 78104248 - 5000 # 78100248
end <- 78104248 + 5000 # 78133295
p7 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "called",
x_lab = "PGK1",
peaks_tx <- "./results/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bam_macs2_peaks.annotatedpeakAnno.xls",
peaks_ctrl <- "./results/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bam_macs2_peaks.annotatedpeakAnno.xls")
#### PGAM1 ####
###############
# chr10:97422191-97437444
chr <- "10"
start <- 97426191 - 5000 # 97422191
end <- 97426191 + 5000 # 97437444
p8 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "called",
x_lab = "PGAM1",
peaks_tx <- "./results/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bam_macs2_peaks.annotatedpeakAnno.xls",
peaks_ctrl <- "./results/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bam_macs2_peaks.annotatedpeakAnno.xls")
#### PKM ####
#############
# chr15:72195029-72235624
chr <- "15"
start <- 72199029 - 5000 # 72195029
end <- 72199029 + 5000 # 72235624
p10 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "called",
x_lab = "PKM",
peaks_tx <- "./results/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bam_macs2_peaks.annotatedpeakAnno.xls",
peaks_ctrl <- "./results/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bam_macs2_peaks.annotatedpeakAnno.xls")
#### LDHA ####
##############
# chr11:18390563-18412425
chr <- "11"
start <- 18394389 - 5000 # 18390563
end <- 18394389 + 5000 # 18412425
p11 <- ChiPseqPeakPlotter(treatment_bw <- "./results/bigwig_files/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw",
control_bw <- "./results/bigwig_files/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw",
chr <- chr,
start <- start,
end <- end,
average_dist <- 10,
fill_dist <- 10,
type = "called",
x_lab = "LDHA",
peaks_tx <- "./results/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bam_macs2_peaks.annotatedpeakAnno.xls",
peaks_ctrl <- "./results/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bam_macs2_peaks.annotatedpeakAnno.xls")
multiplot(p1, p3, p4, p5, p6, p7, p8, p10, p11, cols = 1)
Plot entire time series data as density plots
#### load files
OS_bw2 <- "./results/2hr-OS1_S4_L008_R1_001.fastq.gz.hisat_OS_2hr.bw"
PS_bw2 <- "./results/2hr-PS1_S13_L008_R1_001.fastq.gz.hisat_PS_2hr.bw"
OS_bw8 <- "./results/8hr-OS1_S7_L008_R1_001.fastq.gz.hisat_OS_8hr.bw"
PS_bw8 <- "./results/8hr-PS1_S16_L008_R1_001.fastq.gz.hisat_PS_8hr.bw"
OS_bw16 <- "./results/16hr-OS1_S10_L008_R1_001.fastq.gz.hisat_OS_16hr.bw"
PS_bw16 <- "./results/16hr-PS1_S19_L008_R1_001.fastq.gz.hisat_PS_16hr.bw"
#### generate a data table containing all range data in a loop ####
###################################################################
genes <- c("PFKM", "HK1", "GPI", "PFKM", "ALDOA", "TPI1", "GAPDH", "PGK1", "PGAM1", "ENO3", "PKM", "LDHA")
TSS <- c(48105253, 69269991, 34353330, 48105253, 30064279, 6866834, 6534517, 78104248, 97426191, 70653125, 72199029, 18394389)
chr <- c("12", "10", "19", "12", "16", "12", "12", "X", "10", "17", "15", "11")
scoreDF <- data.frame()
for(i in 1:length(genes)){
####
start <- TSS[i] - 5000
end<- TSS[i] + 5000
ch <- chr[i]
which <- GRanges(c(ch), IRanges(c(start), c(end)))
#### subset regions
bw_OS2 <- import(OS_bw2, which = which); bw_PS2 <- import(PS_bw2, which = which)
bw_OS8 <- import(OS_bw8, which = which); bw_PS8 <- import(PS_bw8, which = which)
bw_OS16 <- import(OS_bw16, which = which); bw_PS16 <- import(PS_bw16, which = which)
#### Get scores
sco <- score(bw_OS2); OS2 <- data.table(sco); OS2$time <- "OS_2hr"
sco <- score(bw_PS2); PS2 <- data.table(sco); PS2$time <- "PS_2hr"
sco <- score(bw_OS8); OS8 <- data.table(sco); OS8$time <- "OS_8hr"
sco <- score(bw_PS8); PS8 <- data.table(sco); PS8$time <- "PS_8hr"
sco <- score(bw_OS16); OS16 <- data.table(sco); OS16$time <- "OS_16hr"
sco <- score(bw_PS16); PS16 <- data.table(sco); PS16$time <- "PS_16hr"
#### combine files
dt2 <- rbind(OS2, PS2); dt2 <- rbind(dt2, OS8)
dt2 <- rbind(dt2, PS8); dt2 <- rbind(dt2, OS16)
dt2 <- rbind(dt2, PS16)
dt2$gene <- genes[i]
scoreDF <- rbind(scoreDF, dt2)
}
namedf <- scoreDF[!duplicated(scoreDF$gene),]$gene
for(i in 1:length(namedf)){
name <- namedf[i]
dt3 <- scoreDF[scoreDF$time == "PS_2hr" | scoreDF$time == "OS_2hr",]
dt4 <- scoreDF[scoreDF$time == "PS_8hr" | scoreDF$time == "OS_8hr",]
dt5 <- scoreDF[scoreDF$time == "PS_16hr" | scoreDF$time == "OS_16hr",]
dt3 <- dt3[dt3$gene == name,]
dt4 <- dt4[dt4$gene == name,]
dt5 <- dt5[dt5$gene == name,]
p1 <- ggplot(dt3, aes(x=sco, fill=time)) +
geom_density(alpha = 0.7)+
scale_fill_brewer(palette="Dark2")+
# scale_fill_manual(values=c("#9ECAE1", "#4292C6", "#08519C", "#FDAE6B", "#F16913", "#A63603"))+
ylim(c(0, 0.3))+
xlim(c(0,35))+
ggtitle(name)+
theme_pubr()
p2 <- ggplot(dt4, aes(x=sco, fill=time)) +
geom_density(alpha = 0.7)+
scale_fill_brewer(palette="Dark2")+
# scale_fill_manual(values=c("#9ECAE1", "#4292C6", "#08519C", "#FDAE6B", "#F16913", "#A63603"))+
ylim(c(0, 0.3))+
xlim(c(0,35))+
ggtitle(name)+
theme_pubr()
p3 <- ggplot(dt5, aes(x=sco, fill=time)) +
geom_density(alpha = 0.7)+
scale_fill_brewer(palette="Dark2")+
# scale_fill_manual(values=c("#9ECAE1", "#4292C6", "#08519C", "#FDAE6B", "#F16913", "#A63603"))+
ylim(c(0, 0.3))+
xlim(c(0,35))+
ggtitle(name)+
theme_pubr()
multiplot(p1, p2, p3, cols = 3)
}
Identify genes that are epigenetic regulators that respond to flow Mine GO terms Obtain GO information from BioMart
Values <- PSOS$GeneName
myMart <- useMart("ENSEMBL_MART_ENSEMBL", dataset= "mmusculus_gene_ensembl")
go <- getBM(attributes=c("ensembl_gene_id", "ensembl_transcript_id", "wikigene_name", "wikigene_description",
"strand", "go_id", "name_1006", "definition_1006"), mart=myMart, values = Values)
#### Merge with the Fold change Data and merge the GO columns together
setnames(go, c(colnames(go)), c("ensembl_gene_id", "ensembl_transcript_id", "GeneName", "wikigene_description", "strand", "go_id",
"name_1006", "definition_1006"))
go$GeneName <- toupper(go$GeneName)
mergeDT <- merge(PSOS, go, by = "GeneName", allow.cartesian=TRUE)
mergeDT$mergecol <- paste(mergeDT$go_id, mergeDT$name_1006, mergeDT$definition_1006, sep = "@")
Terms <- c("EPIGENETIC")
mergeDT$mergecol <- toupper(mergeDT$mergecol)
enriched <- GOsorteR(DT = mergeDT, terms = Terms)
enriched <- enriched[!duplicated(enriched$GeneName),]
KLF4 was added back based on the following publication XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
KLF4 <- mergeDT[mergeDT$GeneName == "KLF4",]
KLF4 <- KLF4[(duplicated(KLF4$GeneName)),]
enriched <- rbind(enriched, KLF4)
save <- enriched[,c(1,2,65, 66,67, 68)]
save2 <- save
reg <- PSOS[(PSOS$GeneName %in% save2$GeneName),]
dim(reg)
## [1] 58 64
reg <- reg[!duplicated(reg$GeneName),]
mel <- melt(reg[,c(2,3,5,7,9,11,13,15,17,19,21)], id = 1)
NUM <- NULL
for(i in 1:nrow(mel)){
SPL <- strsplit(as.character(mel$variable)[i], split = "_")
SPL <- strsplit(SPL[[1]][1], split = "-")[[1]][1]
SPL <- strsplit(SPL, split = "")[[1]]
SPL <- SPL[3:length(SPL)]
SPL <- paste(SPL, collapse = "")
NUM[i] <- as.numeric(SPL)
}
#NUM
mel$time <- NUM
mel$time <- factor(mel$time)
#brewer.pal(9,"Oranges")
p <- ggplot(data=mel, aes(x=time, y=value, group=GeneName))+
geom_line(size = 1.25, colour = "gray") +
geom_point(shape = 21, size = 2, colour = "black", fill = "gray")+
theme_pubr()+
geom_line(data = subset(mel, GeneName == "KLF4"),
aes(x=`time`, y=value), size = 1.25, colour = "#E31A1C")+
geom_point(data = subset(mel, GeneName == "KLF4"),
aes(x=`time`, y=value), shape = 21, size = 2, colour = "black", fill = "#E31A1C");p
Load the sequence compiled mRNA database
mRNAs <- fread("./results/3 Annotated species mRNA compilation.xls")
setwd(homedir)
mRNAs$external_gene_name <- toupper(mRNAs$external_gene_name)
#### Return mRNAs for DEGs identified
ID2 <- toupper(glycolysis_genes)
TRANS_CHR <- mRNAs[(mRNAs$external_gene_name %in% ID2),]
TRANS_CHR[TRANS_CHR$external_gene_name == "GPI1",]$external_gene_name <- "GPI"
TRANS_CHR <- TRANS_CHR[order(TRANS_CHR$external_gene_name),]
#### Remove erroneous chromosome labels CHR_Labels <- c(1:21, "X", "Y")
TRANS_CHR <- TRANS_CHR[(TRANS_CHR$chromosome_name %in% CHR_Labels),]
#### Relabel chromosome designations for use with the getSeq Function
TRANS_CHR <- ChromLabel(TRANS_CHR)
#### Determine genomes to load and query
L <- TRANS_CHR[order(TRANS_CHR$Scientific_Name),]
L[!duplicated(L$Scientific_Name),]$Scientific_Name
#### Load available genomes
ToLoad <- c("Hsapiens.UCSC.hg38","Mmusculus.UCSC.mm10","Rnorvegicus.UCSC.rn6","Btaurus.UCSC.bosTau8",
"Cfamiliaris.UCSC.canFam3", "Dmelanogaster.UCSC.dm6", "Drerio.UCSC.danRer10","Ggallus.UCSC.galGal4",
"Mmulatta.UCSC.rheMac3","Ptroglodytes.UCSC.panTro3","Sscrofa.UCSC.susScr3","Tguttata.UCSC.taeGut2",
"Scerevisiae.UCSC.sacCer3")
GenomeLoader(ToLoad)
#### return the promoter sequences for the identified transcripts from each Species
genome <- c("Hsapiens.UCSC.hg38","Mmusculus.UCSC.mm10","Rnorvegicus.UCSC.rn6"
,"Btaurus.UCSC.bosTau8", "Cfamiliaris.UCSC.canFam3", "Dmelanogaster.UCSC.dm6"
,"Drerio.UCSC.danRer10","Mmulatta.UCSC.rheMac3","Ggallus.UCSC.galGal4"
,"Ptroglodytes.UCSC.panTro3"#,"Scerevisiae.UCSC.sacCer3"
,"Sscrofa.UCSC.susScr3")#,"Tguttata.UCSC.taeGut2")
PROMTrans <- ChromosomeSeqCompileR(DT = TRANS_CHR, Spec = genome, distance = 2000)
#### Remove sequences that contain a large number of N's
setnames(PROMTrans, c("ensembl_transcript_id", "Sequence", "Species_File", "Scientific_Name", "Common_Name",
"external_gene_name", "transcription_start_site", "transcript_start", "transcript_end",
"chromosome_name", "Genome_Sequence"),
c("ensembl_transcript_id", "mRNA_Sequence", "Species_File", "Scientific_Name", "Common_Name", "gene_symbol",
"transcription_start_site", "transcript_start", "transcript_end", "chromosome_name", "Sequence"))
PROMTrans <- SequenceSiftR(PROMTrans, Percent = 0.50, output = "return_remove")
#### Sort out Splice Variants so that the longest transcript of each protein is represented only once in each species
setnames(PROMTrans, c("ensembl_transcript_id", "mRNA_Sequence", "Species_File", "Scientific_Name", "Common_Name",
"gene_symbol", "transcription_start_site", "transcript_start", "transcript_end",
"chromosome_name", "Sequence", "Percent_N"),
c("ensembl_transcript_id", "Sequence", "Species_File", "Scientific_Name", "Common_Name", "external_gene_name",
"transcription_start_site", "transcript_start", "transcript_end", "chromosome_name",
"Genome_Sequence", "Percent_N"))
PROMTrans <- VariantSort(PROMTrans, variant = "MAX")
PROMTrans[,c(1, 4:10, 12:13), with = FALSE]
## ensembl_transcript_id Scientific_Name Common_Name external_gene_name transcription_start_site transcript_start transcript_end chromosome_name Percent_N Length
## 1: ENST00000564521 Homo_sapiens Human ALDOA 30064434 30064434 30070457 chr16 0 2534
## 2: ENST00000374855 Homo_sapiens Human ALDOB 101435823 101420578 101435823 chr9 0 2451
## 3: ENST00000226253 Homo_sapiens Human ALDOC 28577264 28573115 28577264 chr17 0 1982
## 4: ENST00000464920 Homo_sapiens Human ENO1 8871297 8861005 8871297 chr1 0 2266
## 5: ENST00000535366 Homo_sapiens Human ENO2 6915207 6915207 6923696 chr12 0 2827
## ---
## 258: ENSSSCT00000011507 Sus_scrofa Wild boar PGAM1 118326882 118326882 118336099 chr14 0 774
## 259: ENSSSCT00000018201 Sus_scrofa Wild boar PGAM2 53423634 53423634 53425884 chr18 0 808
## 260: ENSSSCT00000013604 Sus_scrofa Wild boar PGK1 71352751 71339754 71352751 chrX 0 1640
## 261: ENSSSCT00000002159 Sus_scrofa Wild boar PKM 65549984 65549984 65575391 chr7 0 2322
## 262: ENSSSCT00000000746 Sus_scrofa Wild boar TPI1 66278367 66274876 66278367 chr5 0 1267
##### Load and annotate the TX_factor data table
TXDT <- fread("./results/2019-3-22 TX factor data table for R BG.csv", header = TRUE)
TXDT <- TXDT[,.(prot, `MotifMap Degenerate consensus sequence`)]
TXDT$prot <- toupper(TXDT$prot)
TXDT$Consensus_Sequence_Medium <- IUPAC_Boolean(TXDT$`MotifMap Degenerate consensus sequence`, stringency = "medium")
setnames(TXDT, c("prot", "MotifMap Degenerate consensus sequence", "Consensus_Sequence_Medium"),
c("Targeting_Factor", "MotifMap Degenerate consensus sequence", "Consensus_Sequence"))
#### retain only pioneer transcription factors
key <- enriched[!duplicated(enriched$GeneName),]$GeneName
TXDT <- TXDT[(TXDT$Targeting_Factor %in% key),]
head(TXDT)
## Targeting_Factor MotifMap Degenerate consensus sequence Consensus_Sequence
## 1: BMI1 MGYKYCC (A|C)G(C|T)(G|T)(C|T)CC
## 2: BMI1 GGHVDSDS GG...(G|C).(G|C)
## 3: CTCF NNDCCACYAGRKGGCASYR ...CCAC(C|T)AG(A|G)(G|T)GGCA(G|C)(C|T)(A|G)
## 4: CTCF NBNRCCWSNAGRKGGCRSNV ...(A|G)CC(A|T)(G|C).AG(A|G)(G|T)GGC(A|G)(G|C)..
## 5: CTCF NNNBNNCCASNAGRKGGCRV ......CCA(G|C).AG(A|G)(G|T)GGC(A|G).
## 6: CTCF YNDCCASYAGRKGGCRSHV (C|T)..CCA(G|C)(C|T)AG(A|G)(G|T)GGC(A|G)(G|C)..
######################################################################
#### Perform the transcription factor query computations #############
######################################################################
setnames(PROMTrans, c("ensembl_transcript_id", "Sequence", "Species_File", "Scientific_Name", "Common_Name",
"external_gene_name", "transcription_start_site", "transcript_start", "transcript_end",
"chromosome_name", "Genome_Sequence", "Percent_N", "Length"),
c("ensembl_transcript_id", "mRNA_Sequence", "Species_File", "Scientific_Name",
"Common_Name", "gene_symbol", "transcription_start_site", "transcript_start", "transcript_end",
"chromosome_name", "Sequence", "Percent_N", "Length"))
TX_TOT_Species <- TFpredict(Target = PROMTrans, Targeting_Factor_DT = TXDT, type = "multiple_species")
#### Annotate the raw transcription factor hits DT with the IUPAC consensus score
DT25 <- fread("./results/24 Raw Transcription factor hits.xls")
DT25 <- DT25[,c(1:7, 9:12), with = FALSE]
IUPAC <- DT25$`MotifMap Degenerate consensus sequence`
DT25$Score <- IUPAC_ScoreR(IUPAC, stringency = "medium")
#### Retain only transcription factor promoter association conservation between HMR at each promoter
DT25 <- SpeciesTFCons(DT = DT25,Spec = c("Human", "Mouse", "Rat"), provide = "TF_Target")
head(DT25)[,c(1:10, 13), with = FALSE]
## Consensus_Sequence start end Number_Hits Targeting_Factor MotifMap Degenerate consensus sequence length Identified_sequence gene_symbol Species mergecol
## 1: ....G(G|T)AGT..... 2536 2549 3 EP300 NNVDGKAGTDNNNB 4001 GaaaaggagtacgtcA ALDOB Cattle EP300-ALDOB
## 2: ....G(G|T)AGT..... 3360 3373 3 EP300 NNVDGKAGTDNNNB 4001 CtccgggagttggtgA ALDOB Cattle EP300-ALDOB
## 3: ....G(G|T)AGT..... 3403 3416 3 EP300 NNVDGKAGTDNNNB 4001 TtcatggagtcgcaaA ALDOB Cattle EP300-ALDOB
## 4: CACCC 947 951 12 KLF2 CACCC 4001 AcacccA ALDOB Cattle KLF2-ALDOB
## 5: CACCC 947 951 12 KLF4 CACCC 4001 AcacccA ALDOB Cattle KLF4-ALDOB
## 6: CACCC 965 969 12 KLF2 CACCC 4001 TcacccT ALDOB Cattle KLF2-ALDOB
#### Determine the number of times each transcription factor consensus sequence is present for each promoter in human ## HMR conserved transcription factors
TX_ABUN <- TFRankR(DT = DT25, sortBy = "abundance", dec = TRUE)
TX_ABUN <- TX_ABUN[TX_ABUN$Species == "Human",]
TX_ABUN
## Number_Hits Targeting_Factor gene_symbol Species
## 1: 24 KLF2 GAPDH Human
## 2: 24 KLF4 GAPDH Human
## 3: 22 KLF2 FBP2 Human
## 4: 22 KLF4 FBP2 Human
## 5: 22 KLF2 PGAM2 Human
## ---
## 108: 1 MECP2 HK2 Human
## 109: 1 MECP2 LDHC Human
## 110: 1 MECP2 PFKFB3 Human
## 111: 1 MECP2 PGAM2 Human
## 112: 1 MECP2 PKM Human
g1 <- ggplot(data=TX_ABUN, aes(x=gene_symbol, y=Number_Hits, group=Targeting_Factor, colour=Targeting_Factor)) +
geom_linerange(data=TX_ABUN, mapping=aes(x=gene_symbol, ymin=0, ymax=Number_Hits), size = 2, position = position_dodge(width = 0.8))+
geom_point(size = 3, position = position_dodge(width = 0.8)) +
theme_pubr() +
ylab("Number of binding sites") +
xlab("mRNA") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
g1
g1 <- ggplot(data=TX_ABUN, aes(x=gene_symbol, y=Number_Hits, group=Targeting_Factor, colour=Targeting_Factor)) +
geom_line() +
geom_point() +
theme_pubr() +
ylab("Number of binding sites") +
xlab("mRNA") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
g1
#### Read in and clean up the data ####
DATA <- fread("./results/2 A2B1_pvalue KLF4_over.csv")
DATA$gene <- toupper(DATA$gene)
DATA <- DATA[,c(3,10,13)]
DATA <- DATA[!DATA$`log2(fold_change)` == "#NAME?",]
DATA <- DATA[!DATA$`log2(fold_change)` == "inf",]
DATA$`log2(fold_change)` <- as.numeric(DATA$`log2(fold_change)`)
#### Label all data groups ####
Interactors <- c("GCKR", "VDAC", "NOS", "GSK3B", "GSK3A", "CKB", "AK1")
gly <- DATA[(DATA$gene %in% glycolysis_genes),]
gly$group <- "Glycolysis"
int <- DATA[(DATA$gene %in% Interactors),]
int$group <- "Interactor"
nothing <- DATA[!(DATA$gene %in% glycolysis_genes),]
nothing <- nothing[!(nothing$gene %in% Interactors),]
nothing$group <- "All other"
dat <- rbind(gly, int, nothing)
identical(nrow(DATA), nrow(dat)); nrow(DATA) == nrow(dat)
## [1] TRUE
## [1] TRUE
setnames(dat, colnames(dat), c("gene", "FC", "qval", "group"))
dat$logqval <- -log10(dat$qval) #### Take the -log10 of the q-value
dat <- dat[!dat$logqval == 0,] #### Remove entries with a logqvalue of 0
dat <- dat[dat$logqval < 3.5,] #### Remove entries with a logqvalue less than 3.5
Generate a volcano plots
p <- ggplot(dat, aes(x=FC, y=logqval, colour = group)) +
geom_point(shape = 21, size = 2, colour = "black", fill = "#4575B4")+
theme_pubr() +
labs(x = "log2(FC)", y = "-log10(pval)") +
geom_point(data = subset(dat, gene == 'GCKR'),
aes(x=FC, y=logqval), shape = 21, size = 2, colour = "black", fill = "#D95F02") +
geom_point(data = subset(dat, group == 'Glycolysis'),
aes(x=FC, y=logqval), shape = 21, size = 2, colour = "black", fill = "#D7191C" );p
p <- ggplot(dat, aes(x=FC, y=logqval, colour = group)) +
geom_point(shape = 21, size = 2, colour = "grey", fill = "grey")+
theme_pubr() +
labs(x = "log2(FC)", y = "-log10(pval)") +
geom_point(data = subset(dat, group == 'Glycolysis'),
aes(x=FC, y=logqval), shape = 21, size = 4, colour = "black", fill = "#D7191C" );p
p <- ggplot(dat, aes(x=FC, y=logqval, colour = group)) +
geom_point(shape = 21, size = 2, colour = "grey", fill = "grey")+
theme_pubr() +
labs(x = "log2(FC)", y = "-log10(pval)") +
geom_point(data = subset(dat, gene == 'GCKR'),
aes(x=FC, y=logqval), shape = 21, size = 4, colour = "black", fill = "#D7191C");p
DATA <- fread("./results/99 qPCR data.csv", sep=",")
DATA$group <- paste(DATA$Gene, DATA$Tratment, sep = "@")
DATA <- DATA[DATA$Experiment == "KLF4_overexpress",]
#### Take data averages ####
mea <- DATA[ ,.(mean_actin = mean(Actin_CTRL_Cq), mean_gene = mean(Cq_value)), by = group]
#### take differences between averages ####
mea$diff <- mea$mean_gene - mea$mean_actin
#### compute fold changes ####
spl <- unlist(strsplit(mea[grep("KLF4", mea$group),]$group, split = "@")); name <- spl[!grepl("KLF4", spl)]
DT <- data.table(Gene = name,
KLF4 = mea[grep("KLF4", mea$group),]$diff,
NUL = mea[grep("NULL", mea$group),]$diff,
FC = 2^-(mea[grep("KLF4", mea$group),]$diff-mea[grep("NULL", mea$group),]$diff))
# Take means
DT[, means := mean(FC), by = Gene]
DT
## Gene KLF4 NUL FC means
## 1: HK1 9.313333 8.113333 0.4352753 0.4363197
## 2: HK1 7.283333 6.141667 0.4532357 0.4363197
## 3: HK1 3.970000 2.720000 0.4204482 0.4363197
## 4: GPI1 3.970000 2.740000 0.4263174 0.4373357
## 5: GPI1 3.580000 2.336667 0.4223956 0.4373357
## 6: GPI1 2.120000 1.010000 0.4632940 0.4373357
## 7: PFKM 9.456667 8.056667 0.3789291 0.3777093
## 8: PFKM 6.466667 5.150000 0.4014614 0.3777093
## 9: PFKM 4.646667 3.143333 0.3527375 0.3777093
## 10: ALDOA 4.326667 3.300000 0.4908429 0.4796198
## 11: ALDOA 6.880000 5.740000 0.4537596 0.4796198
## 12: ALDOA 8.093333 7.076667 0.4942570 0.4796198
## 13: TPI1 5.000000 4.156667 0.5573543 0.5812818
## 14: TPI1 6.213333 5.470000 0.5973576 0.5812818
## 15: TPI1 5.090000 4.326667 0.5891336 0.5812818
## 16: GAPDH 6.646667 5.693333 0.5164379 0.5306635
## 17: GAPDH 6.276667 5.366667 0.5321851 0.5306635
## 18: GAPDH 3.490000 2.610000 0.5433674 0.5306635
## 19: PGK1 4.733333 3.926667 0.5717012 0.5437905
## 20: PGK1 4.166667 3.323333 0.5573543 0.5437905
## 21: PGK1 2.410000 1.416667 0.5023158 0.5437905
## 22: PGAM1 3.956667 3.173333 0.5810228 0.5829924
## 23: PGAM1 4.280000 3.460000 0.5664419 0.5829924
## 24: PGAM1 4.000000 3.266667 0.6015125 0.5829924
## 25: ENO3 4.493333 3.706667 0.5796819 0.5567709
## 26: ENO3 3.646667 2.750000 0.5371263 0.5567709
## 27: ENO3 3.683333 2.830000 0.5535044 0.5567709
## 28: PKM2 3.590000 2.733333 0.5522270 0.5634618
## 29: PKM2 3.800000 2.976667 0.5651347 0.5634618
## 30: PKM2 3.880000 3.076667 0.5730237 0.5634618
## 31: LDHA 5.413333 4.633333 0.5823668 0.5536801
## 32: LDHA 8.523333 7.580000 0.5200300 0.5536801
## 33: LDHA 5.690000 4.850000 0.5586436 0.5536801
## Gene KLF4 NUL FC means
DT$log2FC <- log2(DT$FC)
DT$Gene <- factor(DT$Gene, levels= c("HK1", "GPI1", "PFKM", "ALDOA", "TPI1", "GAPDH", "PGK1", "PGAM1", "ENO3", "PKM2", "LDHA"),labels = c("HK1", "GPI1", "PFKM", "ALDOA", "TPI1", "GAPDH", "PGK1", "PGAM1", "ENO3", "PKM2", "LDHA"))
# my_comparisons <- list( c("hr2", "hr8"), c("hr8", "hr16"), c("hr2", "hr16") )
p<- ggplot(DT, aes(x=Gene, y=log2FC)) + # , fill=Gene
# geom_violin(width = 1.8, fill = c("#1B9E77"))+ #, "#D95F02", "#7570B3"
# # scale_fill_brewer(palette="Dark2")+
geom_boxplot(fill = c("#1B9E77"), width=0.9)+
# geom_bar(stat="identity") + #fill="skyblue", alpha=0.5
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black", fill = "black")+
theme_pubr()#+
# ylim(c(0, 2))
p
DT <- fread("./results/PS OS hexokinase activity assay.xls")
DT <- DT[!DT$type == "Static",]
DT$type <- factor(DT$type, levels= c("PS", "OS"),labels = c("PS", "OS"))
my_comparisons <- list( c("PS", "OS") )
p<- ggplot(DT, aes(x=type, y=data, fill=type)) +
geom_violin(width = 1)+
scale_fill_brewer(palette="Dark2")+
geom_boxplot(fill = c("grey"), width=0.2)+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black")+
theme_pubr()+
stat_compare_means(comparisons = my_comparisons); p
Mine GO terms
# mergeDT <- fread("./results/10 FC GO merge.xls")
Terms <- c("GLYCOLYSIS", "GLUCOSE")
mergeDT$mergecol <- toupper(mergeDT$mergecol)
enriched <- GOsorteR(DT = mergeDT, terms = Terms)
enriched <- enriched[!duplicated(enriched$GeneName),]
save <- enriched[,c(1,2,65, 66,67, 68)]
#### remove glycolysis genes ####
remove_genes <- c("SLC",
"HK1","HK2", "HK3", "GK", "GK5", "G6PC", "GPI", "GPI1",
"PFKM", "PFKL", "PFKP", "PFKFB3", "FBP1", "FBP2", "ALDOA",
"ALDOB", "ALDOC", "TPI1", "GAPDH", "GAPDHS", "PGK1", "PGK2",
"PGAM1", "PGAM2", "ENO", "ENO1", "ENO2", "ENO3", "PKM", "PKM2",
"PKLR", "LDHAL6B", "LDHA", "LDHB", "LDHC", "PCK1", "PCK2", "PC", "GK3P")
save2 <- save
for(i in 1:length(remove_genes)){
save2 <- save2[!grep(remove_genes[i], save2$GeneName, ignore.case = TRUE),]
}
reg <- PSOS[(PSOS$GeneName %in% save2$GeneName),]
dim(reg)
## [1] 691 64
#### subset genes that show a trend over time and have a p value less than 0.05.
reg <- reg[reg$`PS6-OS6_logFC` > reg$`PS1-OS1_logFC` & reg$`PS24-OS24_logFC` > reg$`PS6-OS6_logFC` |
reg$`PS6-OS6_logFC` < reg$`PS1-OS1_logFC` & reg$`PS24-OS24_logFC` < reg$`PS6-OS6_logFC`,]
reg <- reg[reg$`PS24-OS24_PValue` < 0.05,]
#### generate plots
mel <- melt(reg[,c(2,3,5,7,9,11,13,15,17,19,21)], id = 1)
NUM <- NULL
for(i in 1:nrow(mel)){
SPL <- strsplit(as.character(mel$variable)[i], split = "_")
SPL <- strsplit(SPL[[1]][1], split = "-")[[1]][1]
SPL <- strsplit(SPL, split = "")[[1]]
SPL <- SPL[3:length(SPL)]
SPL <- paste(SPL, collapse = "")
NUM[i] <- as.numeric(SPL)
}
#NUM
mel$time <- NUM
p <- ggplot(data=mel, aes(x=time, y=value, group=GeneName, colour=GeneName)) +
geom_line() +
theme_pubr()+
geom_point(); p
mel$time <- factor(mel$time)
#brewer.pal(9,"Oranges")
p <- ggplot(data=mel, aes(x=time, y=value, group=GeneName))+
geom_line(size = 1.25, colour = "gray") +
geom_point(shape = 21, size = 2, colour = "black", fill = "gray")+
theme_pubr()+
geom_line(data = subset(mel, GeneName == "GCKR"),
aes(x=`time`, y=value), size = 1.25, colour = "#E31A1C")+
geom_point(data = subset(mel, GeneName == "GCKR"),
aes(x=`time`, y=value), shape = 21, size = 2, colour = "black", fill = "#E31A1C");p
########################
#### PSOS GCKR qPCR ####
########################
DATA <- fread("./results/99 qPCR data.csv", sep=",")
DATA$group <- paste(DATA$Gene, DATA$Tratment, sep = "@")
DATA <- DATA[DATA$Experiment == "PSOS_GCKR",]
#### Take data averages ####
mea <- DATA[ ,.(mean_actin = mean(Actin_CTRL_Cq), mean_gene = mean(Cq_value)), by = group]
#### take differences between averages ####
mea$diff <- mea$mean_gene - mea$mean_actin
#### compute fold changes ####
spl <- unlist(strsplit(mea[grep("PS", mea$group),]$group, split = "_")); name <- spl[grepl("PS", spl)]
name <- gsub("@PS", "_", name)
PS = mea[grep("PS4|PS8|PS16", mea$group),]$diff
OS = mea[grep("OS4|OS8|OS16", mea$group),]$diff
stimulation <- c(rep("PS", length(PS)), rep("OS", length(PS)))
static <- rep(mea[grep("static", mea$group),]$diff, (length(PS)/3))
DT <- data.table(Gene = name,
stimulation = stimulation,
PS = c(PS, OS),
static = static,
FC = 2^-(c(PS,OS)-static))
# Take means
DT[, means := mean(FC), by = Gene]
DT
## Gene stimulation PS static FC means
## 1: GCKR_4 PS 5.693333 6.916667 2.334856 2.055320
## 2: GCKR_4 PS 5.566667 6.653333 2.123828 2.055320
## 3: GCKR_4 PS 3.493333 4.813333 2.496661 2.055320
## 4: GCKR_8 PS 5.253333 6.916667 3.167475 2.405453
## 5: GCKR_8 PS 5.076667 6.653333 2.982799 2.405453
## 6: GCKR_8 PS 3.177333 4.813333 3.108029 2.405453
## 7: GCKR_16 PS 4.400000 6.916667 5.722584 3.458621
## 8: GCKR_16 PS 4.303333 6.653333 5.098243 3.458621
## 9: GCKR_16 PS 2.346667 4.813333 5.527652 3.458621
## 10: GCKR_4 OS 5.920000 6.916667 1.995384 2.055320
## 11: GCKR_4 OS 5.946667 6.653333 1.632029 2.055320
## 12: GCKR_4 OS 4.006667 4.813333 1.749165 2.055320
## 13: GCKR_8 OS 6.063333 6.916667 1.806670 2.405453
## 14: GCKR_8 OS 5.920000 6.653333 1.662476 2.405453
## 15: GCKR_8 OS 4.043333 4.813333 1.705270 2.405453
## 16: GCKR_16 OS 6.476667 6.916667 1.356604 3.458621
## 17: GCKR_16 OS 6.086667 6.653333 1.481098 3.458621
## 18: GCKR_16 OS 4.166667 4.813333 1.565547 3.458621
DT$Gene <- factor(DT$Gene, levels= c("GCKR_4", "GCKR_8", "GCKR_16"),labels = c("GCKR_4", "GCKR_8", "GCKR_16"))
p<- ggplot(DT, aes(x=Gene, y=FC, fill=stimulation)) + #
# geom_violin()+ #, "#D95F02", "#7570B3" width = 1.8, fill = c("#1B9E77")
geom_boxplot()+ #fill = c("#1B9E77"), width=0.9
# geom_bar(stat="identity", position=position_dodge()) +
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test")+ # Add pairwise comparisons p-value
ylim(c(0, 7))
p
Load the sequence compiled mRNA database
mRNAs <- fread("./results/3 Annotated species mRNA compilation.xls")
setwd(homedir)
mRNAs$external_gene_name <- toupper(mRNAs$external_gene_name)
#### Return mRNAs for DEGs identified
ID2 <- toupper("GCKR")
TRANS_CHR <- mRNAs[(mRNAs$external_gene_name %in% ID2),]
TRANS_CHR[TRANS_CHR$external_gene_name == "GPI1",]$external_gene_name <- "GPI"
TRANS_CHR <- TRANS_CHR[order(TRANS_CHR$external_gene_name),]
#### Remove erroneous chromosome labels CHR_Labels <- c(1:21, "X", "Y")
TRANS_CHR <- TRANS_CHR[(TRANS_CHR$chromosome_name %in% CHR_Labels),]
#### Relabel chromosome designations for use with the getSeq Function
TRANS_CHR <- ChromLabel(TRANS_CHR)
#### Determine genomes to load and query
L <- TRANS_CHR[order(TRANS_CHR$Scientific_Name),]
L[!duplicated(L$Scientific_Name),]$Scientific_Name
#### Load available genomes
ToLoad <- c("Hsapiens.UCSC.hg38","Mmusculus.UCSC.mm10","Rnorvegicus.UCSC.rn6","Btaurus.UCSC.bosTau8",
"Cfamiliaris.UCSC.canFam3", "Dmelanogaster.UCSC.dm6", "Drerio.UCSC.danRer10","Ggallus.UCSC.galGal4",
"Mmulatta.UCSC.rheMac3","Ptroglodytes.UCSC.panTro3","Sscrofa.UCSC.susScr3","Tguttata.UCSC.taeGut2",
"Scerevisiae.UCSC.sacCer3")
GenomeLoader(ToLoad)
#### return the promoter sequences for the identified transcripts from each Species
genome <- c("Hsapiens.UCSC.hg38","Mmusculus.UCSC.mm10","Rnorvegicus.UCSC.rn6"
,"Btaurus.UCSC.bosTau8", "Cfamiliaris.UCSC.canFam3", "Dmelanogaster.UCSC.dm6"
,"Drerio.UCSC.danRer10","Mmulatta.UCSC.rheMac3","Ggallus.UCSC.galGal4"
,"Ptroglodytes.UCSC.panTro3"#,"Scerevisiae.UCSC.sacCer3"
,"Sscrofa.UCSC.susScr3")#,"Tguttata.UCSC.taeGut2")
PROMTrans <- ChromosomeSeqCompileR(DT = TRANS_CHR, Spec = genome, distance = 2000)
#### Remove sequences that contain a large number of N's
setnames(PROMTrans, c("ensembl_transcript_id", "Sequence", "Species_File", "Scientific_Name", "Common_Name",
"external_gene_name", "transcription_start_site", "transcript_start", "transcript_end",
"chromosome_name", "Genome_Sequence"),
c("ensembl_transcript_id", "mRNA_Sequence", "Species_File", "Scientific_Name", "Common_Name", "gene_symbol",
"transcription_start_site", "transcript_start", "transcript_end", "chromosome_name", "Sequence"))
PROMTrans <- SequenceSiftR(PROMTrans, Percent = 0.50, output = "return_remove")
#### Sort out Splice Variants so that the longest transcript of each protein is represented only once in each species
setnames(PROMTrans, c("ensembl_transcript_id", "mRNA_Sequence", "Species_File", "Scientific_Name", "Common_Name",
"gene_symbol", "transcription_start_site", "transcript_start", "transcript_end",
"chromosome_name", "Sequence", "Percent_N"),
c("ensembl_transcript_id", "Sequence", "Species_File", "Scientific_Name", "Common_Name", "external_gene_name",
"transcription_start_site", "transcript_start", "transcript_end", "chromosome_name",
"Genome_Sequence", "Percent_N"))
PROMTrans <- VariantSort(PROMTrans, variant = "MAX")
PROMTrans[,c(1, 4:10, 12:13), with = FALSE]
## ensembl_transcript_id Scientific_Name Common_Name external_gene_name transcription_start_site transcript_start transcript_end chromosome_name Percent_N Length
## 1: ENST00000264717 Homo_sapiens Human GCKR 27496842 27496842 27523684 chr2 0.00000000 2186
## 2: ENSMUST00000202909 Mus_musculus Mouse GCKR 31297554 31297554 31327295 chr5 0.00000000 4014
## 3: ENSRNOT00000073228 Rattus_norvegicus Rat GCKR 26385761 26355296 26385761 chr6 0.00000000 2273
## 4: ENSBTAT00000044354 Bos_taurus Cattle GCKR 72176344 72154947 72176344 chr11 0.02499375 1515
## 5: ENSDART00000148916 Danio_rerio Zebrafish GCKR 5720936 5706054 5720936 chr17 0.00000000 1211
## 6: ENSMMUT00000007199 Macaca_mulatta Rhesus macaque GCKR 27491384 27491384 27514309 chr13 0.00000000 1878
## 7: ENSSSCT00000023781 Sus_scrofa Wild boar GCKR 118796212 118796212 118823340 chr3 0.00000000 2109
Load and annotate the TX_factor data table
TXDT <- fread("./results/2019-3-22 TX factor data table for R BG.csv", header=TRUE)
TXDT <- TXDT[,.(prot, `MotifMap Degenerate consensus sequence`)]
TXDT$prot <- toupper(TXDT$prot)
TXDT$Consensus_Sequence_Medium <- IUPAC_Boolean(TXDT$`MotifMap Degenerate consensus sequence`, stringency = "medium")
setnames(TXDT, c("prot", "MotifMap Degenerate consensus sequence", "Consensus_Sequence_Medium"),
c("Targeting_Factor", "MotifMap Degenerate consensus sequence", "Consensus_Sequence"))
head(TXDT)
## Targeting_Factor MotifMap Degenerate consensus sequence Consensus_Sequence
## 1: AHR SYYCNRNSTNGCGTGNSW (G|C)(C|T)(C|T)C.(A|G).(G|C)T.GCGTG.(G|C)(A|T)
## 2: AHR BKNGCGTGHNN .(G|T).GCGTG...
## 3: AIRE GVTTATTAAKTGGTTATATTGGKTD G.TTATTAA(G|T)TGGTTATATTGG(G|T)T.
## 4: BMI1 MGYKYCC (A|C)G(C|T)(G|T)(C|T)CC
## 5: BMI1 GGHVDSDS GG...(G|C).(G|C)
## 6: PCBP1 CAGCCAATGAG CAGCCAATGAG
Perform the transcription factor query computations
setnames(PROMTrans, c("ensembl_transcript_id", "Sequence", "Species_File", "Scientific_Name", "Common_Name", "external_gene_name", "transcription_start_site", "transcript_start", "transcript_end", "chromosome_name", "Genome_Sequence", "Percent_N", "Length"),
c("ensembl_transcript_id", "mRNA_Sequence", "Species_File", "Scientific_Name", "Common_Name", "gene_symbol", "transcription_start_site", "transcript_start", "transcript_end", "chromosome_name", "Sequence", "Percent_N", "Length"))
TX_TOT_Species <- TFpredict(Target = PROMTrans, Targeting_Factor_DT = TXDT, type = "multiple_species")
#### Annotate the raw transcription factor hits DT with the IUPAC consensus score
DT25 <- fread("./results/24 Raw Transcription factor hits.xls")
DT25 <- DT25[,c(1:7, 9:12), with = FALSE]
IUPAC <- DT25$`MotifMap Degenerate consensus sequence`
DT25$Score <- IUPAC_ScoreR(IUPAC, stringency = "medium")
#### Retain only transcription factor promoter association conservation between HMR at each promoter
DT25 <- SpeciesTFCons(DT = DT25,Spec = c("Human", "Mouse", "Rat"), provide = "TF_Target")
head(DT25)[,c(1:10, 13), with = FALSE]
## Consensus_Sequence start end Number_Hits Targeting_Factor MotifMap Degenerate consensus sequence length Identified_sequence gene_symbol Species mergecol
## 1: (A|C).AGATAA.(A|G) 3042 3051 1 GATA1 MNAGATAAVR 4001 CacagataaagA GCKR Cattle GATA1-GCKR
## 2: (A|C)ATTA(A|G) 2794 2799 4 PDX1 MATTAR 4001 GaattagA GCKR Cattle PDX1-GCKR
## 3: (A|C)ATTA(A|G) 3096 3101 4 PDX1 MATTAR 4001 AaattagG GCKR Cattle PDX1-GCKR
## 4: (A|C)ATTA(A|G) 3479 3484 4 PDX1 MATTAR 4001 CaattaaA GCKR Cattle PDX1-GCKR
## 5: (A|C)ATTA(A|G) 3668 3673 4 PDX1 MATTAR 4001 GaattaaA GCKR Cattle PDX1-GCKR
## 6: (A|C|T)GATA(A|G) 660 665 9 MED1 (A|C|T)GATA(A|G) 4001 TagatagG GCKR Cattle MED1-GCKR
Determine the number of times each transcription factor consensus sequence is present for each promoter in human
TX_ABUN <- TFRankR(DT = DT25, sortBy = "abundance", dec = TRUE)
TX_ABUN <- TX_ABUN[TX_ABUN$Species == "Human",]
head(TX_ABUN)
## Number_Hits Targeting_Factor gene_symbol Species
## 1: 51 EN1 GCKR Human
## 2: 47 PAX8 GCKR Human
## 3: 40 GATA1 GCKR Human
## 4: 35 E2F1 GCKR Human
## 5: 30 HMGA1 GCKR Human
## 6: 27 SREBF2 GCKR Human
Determine the transcription factors that have the greatest species preservation for each promoter
TX_SPEC <- TFRankR(DT = DT25, sortBy = "species", dec = TRUE)
head(TX_SPEC)
## Targeting_Factor Number_Species gene_symbol
## 1: GATA1 7 GCKR
## 2: PDX1 7 GCKR
## 3: MED1 7 GCKR
## 4: TP53 7 GCKR
## 5: JUN 7 GCKR
## 6: PAX4 7 GCKR
Annotate identified transcription factors with fold changes and generate a graph ranking them by fold changes in the last time point ####
DATA <- fread("./results/5 PSOS FC data.xls")
DATA$GeneName <- toupper(DATA$GeneName)
TXfactor <- TX_SPEC
TXfactor$Targeting_Factor <- toupper(TXfactor$Targeting_Factor)
keys <- TXfactor[!duplicated(TXfactor$Targeting_Factor),]$Targeting_Factor
factors <- DATA[(DATA$GeneName %in% keys),]
factors <- factors[,c(2, 21:22)]
factors <- factors[factors$`PS24-OS24_PValue` < 0.5]
factors <- factors[factors[, .I[which.min(`PS24-OS24_logFC`)], by=GeneName]$V1]
factors <- factors[order(factors$`PS24-OS24_logFC`),]
#### generate graph
group <- NULL
for(i in 1:nrow(factors)){
if(factors$`PS24-OS24_logFC`[i] < 0){
group[i] <- "LESS"
}else{
group[i] <- "MORE"
}
}
factors$group <- group
setnames(factors, c(colnames(factors)), c("GeneName", "FC", "pval", "group"))
factors[order(factors$FC),]
## GeneName FC pval group
## 1: KLF6 -1.6997771 3.729252e-01 LESS
## 2: FOXO4 -1.6904634 3.788613e-01 LESS
## 3: RUNX1 -1.4509353 4.000316e-01 LESS
## 4: E2F1 -0.6932858 3.694364e-01 LESS
## 5: MAFB -0.6503075 3.258786e-01 LESS
## 6: ELF1 -0.5113079 2.335469e-01 LESS
## 7: HMGA1 -0.2658876 3.976512e-01 LESS
## 8: HSF1 0.2611925 2.701231e-01 MORE
## 9: STAT1 0.4013506 1.829854e-01 MORE
## 10: ZBTB7A 0.4170766 4.597126e-01 MORE
## 11: SREBF2 0.5017966 7.430844e-02 MORE
## 12: NFATC1 0.7612140 3.067089e-01 MORE
## 13: HIC1 1.9413522 3.288013e-01 MORE
## 14: KLF2 2.3178029 7.030000e-17 MORE
## 15: KLF4 2.8066037 1.400000e-09 MORE
p <- ggbarplot(factors, x = "GeneName", y = "FC",
fill = "group", # change fill color by cyl
color = "white", # Set bar border colors to white
palette = "npg", # jco journal color palett. see ?ggpar
sort.val = "asc", # Sort the value in dscending order
sort.by.groups = TRUE, # Sort inside each group
x.text.angle = 90 # Rotate vertically x axis texts
);p
###################################
#### AD-KLF4 ChiP binding qPCR ####
###################################
DATA <- fread("./results/99 qPCR data.csv", sep=",")
DATA$group <- paste(DATA$Gene, DATA$Tratment, sep = "@")
DATA <- DATA[DATA$Experiment == "AD_KLF4_binding",]
#### Take data averages ####
mea <- DATA[ ,.(mean_actin = mean(Actin_CTRL_Cq), mean_gene = mean(Cq_value)), by = group]
#### take differences between averages ####
mea$diff <- mea$mean_gene - mea$mean_actin
#### compute fold changes ####
spl <- unlist(strsplit(mea[grep("KLF4", mea$group),]$group, split = "_")); name <- spl[grepl("KLF4", spl)]
name <- gsub("@KLF4", "_", name)
KLF4 = mea[grep("KLF4", mea$group),]$diff
NUL = mea[grep("NULL", mea$group),]$diff
stimulation <- c(rep("KLF4", length(KLF4)))#, rep("NULL", length(NUL)))
# static <- rep(mea[grep("static", mea$group),]$diff, (length(PS)/3))
DT <- data.table(Gene = name,
stimulation = stimulation,
KLF4 = KLF4,
NUL = NUL,
FC = 2^-(c(KLF4)-NUL))
# Take means
DT[, means := mean(FC), by = Gene]
DT
## Gene stimulation KLF4 NUL FC means
## 1: KLF4 KLF4 7.60000 8.95000 2.549121 2.505314
## 2: KLF4 KLF4 8.56000 10.12667 2.962195 2.505314
## 3: KLF4 KLF4 10.84333 11.84667 2.004626 2.505314
DT2 = data.frame(Gene = rep(DT$Gene[1], 6),
stimulation = c(DT$stimulation, rep("NULL", 3)),
FC = c(DT$FC, rep(1, 3)))
DT2
## Gene stimulation FC
## 1 KLF4 KLF4 2.549121
## 2 KLF4 KLF4 2.962195
## 3 KLF4 KLF4 2.004626
## 4 KLF4 NULL 1.000000
## 5 KLF4 NULL 1.000000
## 6 KLF4 NULL 1.000000
p<- ggplot(DT2, aes(x=stimulation, y=FC, fill=stimulation)) + #
# geom_violin()+ #, "#D95F02", "#7570B3" width = 1.8, fill = c("#1B9E77")
geom_boxplot()+ #fill = c("#1B9E77"), width=0.9
# geom_bar(stat="identity", position=position_dodge()) +
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test")+ # Add pairwise comparisons p-value
ylim(c(0, 3.5))
p
######################
#### AD-KLF4 qPCR ####
######################
DATA <- fread("./results/99 qPCR data.csv", sep=",")
DATA$group <- paste(DATA$Gene, DATA$Tratment, sep = "@")
DATA <- DATA[DATA$Experiment == "AD_KLF4",]
#### Take data averages ####
mea <- DATA[ ,.(mean_actin = mean(Actin_CTRL_Cq), mean_gene = mean(Cq_value)), by = group]
#### take differences between averages ####
mea$diff <- mea$mean_gene - mea$mean_actin
#### compute fold changes ####
spl <- unlist(strsplit(mea[grep("KLF4", mea$group),]$group, split = "_")); name <- spl[grepl("KLF4", spl)]
name <- gsub("@KLF4", "_", name)
KLF4 = mea[grep("KLF4", mea$group),]$diff
NUL = mea[grep("NULL", mea$group),]$diff
stimulation <- c(rep("KLF4", length(KLF4)))#, rep("NULL", length(NUL)))
# static <- rep(mea[grep("static", mea$group),]$diff, (length(PS)/3))
DT <- data.table(Gene = name,
stimulation = stimulation,
KLF4 = KLF4,
NUL = NUL,
FC = 2^-(c(KLF4)-NUL))
# Take means
DT[, means := mean(FC), by = Gene]
DT
## Gene stimulation KLF4 NUL FC means
## 1: KLF4 KLF4 6.410000 7.89000 2.789487 3.062616
## 2: KLF4 KLF4 10.640000 12.40333 3.394816 3.062616
## 3: KLF4 KLF4 7.933333 9.52000 3.003546 3.062616
DT2 = data.table(Gene = rep(DT$Gene[1], 6),
stimulation = c(DT$stimulation, rep("NULL", 3)),
FC = c(DT$FC, rep(1, 3)))
DT2
## Gene stimulation FC
## 1: KLF4 KLF4 2.789487
## 2: KLF4 KLF4 3.394816
## 3: KLF4 KLF4 3.003546
## 4: KLF4 NULL 1.000000
## 5: KLF4 NULL 1.000000
## 6: KLF4 NULL 1.000000
DT2$stimulation <- factor(DT2$stimulation, levels= c("NULL", "KLF4"),labels = c("NULL", "KLF4"))
p<- ggplot(DT2, aes(x=stimulation, y=FC, fill=stimulation)) + #
# geom_violin()+ #, "#D95F02", "#7570B3" width = 1.8, fill = c("#1B9E77")
geom_boxplot()+ #fill = c("#1B9E77"), width=0.9
# geom_bar(stat="identity", position=position_dodge()) +
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test")+ # Add pairwise comparisons p-value
ylim(c(0, 4))
p
###########################################################
#### Adenovirus KLF4 overexpression GCKR western blot #####
###########################################################
DATA <- fread("./results/99 western quantification2.csv", sep=",")
DATA <- DATA[DATA$experiment == "KLF4_AAV",]
#### take differences between background and densitometry value ####
DATA$difference <- DATA$background - DATA$densitometry
DATA
## experiment blot treatment densitometry background difference
## 1: KLF4_AAV GCKR null 180.082 219.932 39.850
## 2: KLF4_AAV GCKR KLF4 160.647 219.932 59.285
## 3: KLF4_AAV GCKR null 176.831 219.932 43.101
## 4: KLF4_AAV GCKR KLF4 173.355 219.932 46.577
## 5: KLF4_AAV GCKR null 207.313 219.932 12.619
## 6: KLF4_AAV GCKR KLF4 203.434 219.932 16.498
## 7: KLF4_AAV Actin null 79.670 219.932 140.262
## 8: KLF4_AAV Actin KLF4 66.859 219.932 153.073
## 9: KLF4_AAV Actin null 77.503 219.932 142.429
## 10: KLF4_AAV Actin KLF4 88.512 219.932 131.420
## 11: KLF4_AAV Actin null 95.482 219.932 124.450
## 12: KLF4_AAV Actin KLF4 99.013 219.932 120.919
#### Construct data table
dt <- data.table(treatment = DATA[DATA$blot == "GCKR",]$treatment,
GCKR = DATA[DATA$blot == "GCKR",]$difference,
Actin = DATA[DATA$blot == "Actin",]$difference)
dt$change <- dt$GCKR/dt$Actin
dt <- rbind(dt[dt$treatment == "KLF4"], dt[dt$treatment == "null"])
dt$FC <- c((dt[dt$treatment == 'KLF4']$change/dt[dt$treatment == 'null']$change), rep(1, 3))
dt
## treatment GCKR Actin change FC
## 1: KLF4 59.285 153.073 0.3872989 1.363195
## 2: KLF4 46.577 131.420 0.3544133 1.171173
## 3: KLF4 16.498 120.919 0.1364384 1.345571
## 4: null 39.850 140.262 0.2841112 1.000000
## 5: null 43.101 142.429 0.3026139 1.000000
## 6: null 12.619 124.450 0.1013982 1.000000
dt$treatment <- factor(dt$treatment, levels= c("null", "KLF4"),labels = c("null", "KLF4"))
p<- ggplot(dt, aes(x=treatment, y=FC, fill=treatment)) +
geom_boxplot()+ #fill = c("#1B9E77"), width=0.9
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test")+ # Add pairwise comparisons p-value
ylim(c(0, 1.75))
p
#######################################
#### KLF4 siRNA GCKR western blot #####
#######################################
DATA <- fread("./results/99 western quantification2.csv", sep=",")
DATA <- DATA[DATA$experiment == "KLF4_si",]
#### take differences between background and densitometry value ####
DATA$difference <- DATA$background - DATA$densitometry
DATA
## experiment blot treatment densitometry background difference
## 1: KLF4_si GCKR SCR 160.513 234.525 74.012
## 2: KLF4_si GCKR KLF4 190.897 234.525 43.628
## 3: KLF4_si Actin SCR 132.910 234.525 101.615
## 4: KLF4_si Actin KLF4 146.190 234.525 88.335
## 5: KLF4_si GCKR KLF4 61.162 107.772 46.610
## 6: KLF4_si GCKR SCR 39.393 107.772 68.379
## 7: KLF4_si GCKR KLF4 52.854 107.772 54.918
## 8: KLF4_si GCKR SCR 42.682 107.772 65.090
## 9: KLF4_si Actin KLF4 50.795 107.772 56.977
## 10: KLF4_si Actin SCR 58.667 107.772 49.105
## 11: KLF4_si Actin KLF4 55.088 107.772 52.684
## 12: KLF4_si Actin SCR 60.505 107.772 47.267
#### Construct data table
dt <- data.table(treatment = DATA[DATA$blot == "GCKR",]$treatment,
GCKR = DATA[DATA$blot == "GCKR",]$difference,
Actin = DATA[DATA$blot == "Actin",]$difference)
dt$change <- dt$GCKR/dt$Actin
dt <- rbind(dt[dt$treatment == "KLF4"], dt[dt$treatment == "SCR"])
dt$FC <- c(-(dt[dt$treatment == 'SCR']$change/dt[dt$treatment == 'KLF4']$change), rep(1, 3))
dt
## treatment GCKR Actin change FC
## 1: KLF4 43.628 88.335 0.4938926 -1.474728
## 2: KLF4 46.610 56.977 0.8180494 -1.702227
## 3: KLF4 54.918 52.684 1.0424038 -1.321053
## 4: SCR 74.012 101.615 0.7283570 1.000000
## 5: SCR 68.379 49.105 1.3925059 1.000000
## 6: SCR 65.090 47.267 1.3770707 1.000000
dt$treatment <- factor(dt$treatment, levels= c("SCR", "KLF4"),labels = c("SCR", "KLF4"))
p<- ggplot(dt, aes(x=treatment, y=FC, fill=treatment)) +
geom_boxplot()+
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test")+ # Add pairwise comparisons p-value
ylim(c(-2, 1.75))
p
#####################################
#### PSOS KLF4 binding GCKR qPCR ####
#####################################
DATA <- fread("./results/99 qPCR data.csv", sep=",")
DATA$group <- paste(DATA$Gene, DATA$Tratment, sep = "@")
DATA <- DATA[DATA$Experiment == "PSOS_KLF4_binding",]
#### Take data averages ####
mea <- DATA[ ,.(mean_actin = mean(Actin_CTRL_Cq), mean_gene = mean(Cq_value)), by = group]
#### take differences between averages ####
mea$diff <- mea$mean_gene - mea$mean_actin
#### compute fold changes ####
spl <- unlist(strsplit(mea[grep("PS", mea$group),]$group, split = "_")); name <- spl[grepl("PS", spl)]
name <- gsub("@PS", "_", name)
PS = mea[grep("PS", mea$group),]$diff
OS = mea[grep("OS", mea$group),]$diff
stimulation <- c(rep("PS", length(PS)), rep("OS", length(PS)))
static <- rep(mea[grep("static", mea$group),]$diff, (length(PS)/3))
DT <- data.table(Gene = name,
stimulation = stimulation,
PS = c(PS, OS),
static = static,
FC = 2^-(c(PS,OS)-static))
# Take means
DT[, means := mean(FC), by = Gene]
DT
## Gene stimulation PS static FC means
## 1: GCKR_ PS 7.666667 8.646667 1.972465 1.640692
## 2: GCKR_ PS 6.783333 7.663333 1.840375 1.640692
## 3: GCKR_ PS 7.530000 8.573333 2.060984 1.640692
## 4: GCKR_ OS 8.536667 8.646667 1.079228 1.640692
## 5: GCKR_ OS 7.326667 7.663333 1.262835 1.640692
## 6: GCKR_ OS 7.870000 8.573333 1.628263 1.640692
# DT$Gene <- factor(DT$Gene, levels= c("GCKR_4", "GCKR_8", "GCKR_16"),labels = c("GCKR_4", "GCKR_8", "GCKR_16"))
p<- ggplot(DT, aes(x=stimulation, y=FC, fill=stimulation)) + #
geom_boxplot()+ #fill = c("#1B9E77"), width=0.9
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black")+ #position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test") # Add pairwise comparisons p-value
# ylim(c(0, 4))
p
########################
#### PSOS GCKR qPCR ####
########################
DATA <- fread("./results/99 qPCR data.csv", sep=",")
DATA$group <- paste(DATA$Gene, DATA$Tratment, sep = "@")
DATA <- DATA[DATA$Experiment == "PSOS_KLF4",]
#### Take data averages ####
mea <- DATA[ ,.(mean_actin = mean(Actin_CTRL_Cq), mean_gene = mean(Cq_value)), by = group]
#### take differences between averages ####
mea$diff <- mea$mean_gene - mea$mean_actin
#### compute fold changes ####
spl <- unlist(strsplit(mea[grep("PS", mea$group),]$group, split = "_")); name <- spl[grepl("PS", spl)]
name <- gsub("@PS", "_", name)
PS = mea[grep("PS", mea$group),]$diff
OS = mea[grep("OS", mea$group),]$diff
stimulation <- c(rep("PS", length(PS)), rep("OS", length(PS)))
static <- rep(mea[grep("static", mea$group),]$diff, (length(PS)/3))
DT <- data.table(Gene = name,
stimulation = stimulation,
PS = c(PS, OS),
static = static,
FC = 2^-(c(PS,OS)-static))
# Take means
DT[, means := mean(FC), by = Gene]
DT
## Gene stimulation PS static FC means
## 1: GCKR_ PS 5.776667 7.350000 2.9759150 2.066798
## 2: GCKR_ PS 6.050000 7.363333 2.4851507 2.066798
## 3: GCKR_ PS 8.526667 10.436667 3.7580910 2.066798
## 4: GCKR_ OS 7.160000 7.350000 1.1407637 2.066798
## 5: GCKR_ OS 7.210000 7.363333 1.1121361 2.066798
## 6: GCKR_ OS 10.543333 10.436667 0.9287314 2.066798
# DT$Gene <- factor(DT$Gene, levels= c("GCKR_4", "GCKR_8", "GCKR_16"),labels = c("GCKR_4", "GCKR_8", "GCKR_16"))
p<- ggplot(DT, aes(x=stimulation, y=FC, fill=stimulation)) + #
geom_boxplot()+ #fill = c("#1B9E77"), width=0.9
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black")+ #position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test") # Add pairwise comparisons p-value
# ylim(c(0, 4))
p
#################################
#### PSOS GCKR western blot #####
#################################
DATA <- fread("./results/99 western quantification2.csv", sep=",")
DATA <- DATA[DATA$experiment == "PSOS",]
#### take differences between background and densitometry value ####
DATA$difference <- DATA$background - DATA$densitometry
DATA
## experiment blot treatment densitometry background difference
## 1: PSOS GCKR PS 179.797 217.946 38.149
## 2: PSOS GCKR OS 187.001 217.946 30.945
## 3: PSOS GCKR PS 162.661 217.946 55.285
## 4: PSOS GCKR OS 181.242 217.946 36.704
## 5: PSOS Actin PS 164.061 217.946 53.885
## 6: PSOS Actin OS 155.639 217.946 62.307
## 7: PSOS Actin PS 163.941 217.946 54.005
## 8: PSOS Actin OS 170.379 217.946 47.567
## 9: PSOS GCKR PS 43.530 107.449 63.919
## 10: PSOS GCKR OS 59.795 107.449 47.654
## 11: PSOS Actin PS 43.070 107.449 64.379
## 12: PSOS Actin OS 39.633 107.449 67.816
#### Construct data table
dt <- data.table(treatment = DATA[DATA$blot == "GCKR",]$treatment,
GCKR = DATA[DATA$blot == "GCKR",]$difference,
Actin = DATA[DATA$blot == "Actin",]$difference)
dt$change <- dt$GCKR/dt$Actin
dt
## treatment GCKR Actin change
## 1: PS 38.149 53.885 0.7079707
## 2: OS 30.945 62.307 0.4966537
## 3: PS 55.285 54.005 1.0237015
## 4: OS 36.704 47.567 0.7716274
## 5: PS 63.919 64.379 0.9928548
## 6: OS 47.654 67.816 0.7026955
dt <- rbind(dt[dt$treatment == "PS"], dt[dt$treatment == "OS"])
dt$FC <- c((dt[dt$treatment == 'PS']$change/dt[dt$treatment == 'OS']$change), rep(1, 3))
dt
## treatment GCKR Actin change FC
## 1: PS 38.149 53.885 0.7079707 1.425482
## 2: PS 55.285 54.005 1.0237015 1.326679
## 3: PS 63.919 64.379 0.9928548 1.412923
## 4: OS 30.945 62.307 0.4966537 1.000000
## 5: OS 36.704 47.567 0.7716274 1.000000
## 6: OS 47.654 67.816 0.7026955 1.000000
dt$treatment <- factor(dt$treatment, levels= c("PS", "OS"),labels = c("PS", "OS"))
p<- ggplot(dt, aes(x=treatment, y=FC, fill=treatment)) +
geom_boxplot()+
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test")+ # Add pairwise comparisons p-value
ylim(c(0, 1.75))
p
###################################
#### PSOS KLF4 siRNA GCKR qPCR ####
###################################
DATA <- fread("./results/99 qPCR data.csv", sep=",")
DATA$group <- paste(DATA$Gene, DATA$Tratment, sep = "@")
DATA <- DATA[DATA$Experiment == "PSOS_KLF4_si",]
#### Take data averages ####
mea <- DATA[ ,.(mean_actin = mean(Actin_CTRL_Cq), mean_gene = mean(Cq_value)), by = group]
#### take differences between averages ####
mea$diff <- mea$mean_gene - mea$mean_actin
#### compute fold changes ####
spl <- unlist(strsplit(mea[grep("PS", mea$group),]$group, split = "_")); name <- spl[grepl("PS", spl)]
name <- gsub("@PS", "", name)
PS_SCR = mea[grep("PS_SCR", mea$group),]$diff
OS_SCR = mea[grep("OS_SCR", mea$group),]$diff
PS_KLF4 = mea[grep("PS_KLF4", mea$group),]$diff
OS_KLF4 = mea[grep("OS_KLF4", mea$group),]$diff
static_scr = mea[grep("static_SCR", mea$group),]$diff
static_KLF4 = mea[grep("static_KLF4", mea$group),]$diff
static <- c(static_scr, static_scr, static_KLF4, static_KLF4)
stimulation <- c(rep("PS_SCR", length(PS_SCR)), rep("OS_SCR", length(OS_SCR)),
rep("PS_KLF4", length(PS_KLF4)), rep("OS_KLF4", length(OS_KLF4)))
DT <- data.table(Gene = name,
stimulation = stimulation,
treated = c(PS_SCR, OS_SCR, PS_KLF4, OS_KLF4),
static = static,
FC = 2^-(c(PS_SCR, OS_SCR, PS_KLF4, OS_KLF4)-static))
# Take means
DT[, means := mean(FC), by = stimulation]
DT
## Gene stimulation treated static FC means
## 1: GCKR PS_SCR 7.023333 8.460000 2.7069470 3.0145295
## 2: GCKR PS_SCR 5.910000 7.606667 3.2415114 3.0145295
## 3: GCKR PS_SCR 7.226667 8.856667 3.0951300 3.0145295
## 4: GCKR OS_SCR 8.513333 8.460000 0.9637071 1.0336257
## 5: GCKR OS_SCR 7.556667 7.606667 1.0352649 1.0336257
## 6: GCKR OS_SCR 8.716667 8.856667 1.1019051 1.0336257
## 7: GCKR PS_KLF4 6.403333 6.263333 0.9075192 0.8614780
## 8: GCKR PS_KLF4 11.260000 11.053333 0.8665370 0.8614780
## 9: GCKR PS_KLF4 5.963333 5.660000 0.8103779 0.8614780
## 10: GCKR OS_KLF4 6.956667 6.263333 0.6184233 0.7250623
## 11: GCKR OS_KLF4 11.390000 11.053333 0.7918688 0.7250623
## 12: GCKR OS_KLF4 6.046667 5.660000 0.7648948 0.7250623
DT$stmulation <- factor(DT$stimulation, levels= c("PS_KLF4", "PS_SCR", "OS_KLF4", "OS_SCR"),labels = c("PS_KLF4", "PS_SCR", "OS_KLF4", "OS_SCR"))
my_comparisons <- list( c("OS_KLF4", "OS_SCR"), c("PS_KLF4", "PS_SCR"), c("PS_SCR", "OS_SCR"), c("PS_KLF4", "OS_KLF4"), c("PS_SCR", "OS_KLF4"))
p<- ggplot(DT, aes(x=stimulation, y=FC, fill=stimulation)) + #
# geom_violin()+ #, "#D95F02", "#7570B3" width = 1.8, fill = c("#1B9E77")
geom_boxplot()+ #fill = c("#1B9E77"), width=0.9
# geom_bar(stat="identity", position=position_dodge()) +
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test", comparisons = my_comparisons)+ # Add pairwise comparisons p-value
ylim(c(0, 5))
p
#########################
#### TA/AA GCKR qPCR ####
#########################
DATA <- fread("./results/99 qPCR data.csv", sep=",")
DATA$group <- paste(DATA$Gene, DATA$Tratment, sep = "@")
DATA <- DATA[DATA$Experiment == "TA_AA",]
#### Take data averages ####
mea <- DATA[ ,.(mean_actin = mean(Actin_CTRL_Cq), mean_gene = mean(Cq_value)), by = group]
#### take differences between averages ####
mea$diff <- mea$mean_gene - mea$mean_actin
#### compute fold changes ####
spl <- unlist(strsplit(mea[grep("TA", mea$group),]$group, split = "_")); name <- spl[grepl("TA", spl)]
name <- gsub("@TA", "", name)
TA = mea[grep("TA", mea$group),]$diff
AA = mea[grep("AA", mea$group),]$diff
stimulation <- c(rep("TA", length(TA)))#, rep("NULL", length(NUL)))
# static <- rep(mea[grep("static", mea$group),]$diff, (length(PS)/3))
DT <- data.table(Gene = name,
stimulation = stimulation,
TA = TA,
AA = AA,
FC = 2^-(c(TA)-AA))
# Take means
DT[, means := mean(FC), by = Gene]
DT
## Gene stimulation TA AA FC means
## 1: GCKR TA 7.446667 8.683333 2.356534 2.310052
## 2: GCKR TA 6.766667 7.956667 2.281527 2.310052
## 3: GCKR TA 7.583333 8.780000 2.292095 2.310052
DT2 = data.table(Gene = rep(DT$Gene[1], 6),
stimulation = c(DT$stimulation, rep("AA", 3)),
FC = c(DT$FC, rep(1, 3)))
DT2
## Gene stimulation FC
## 1: GCKR TA 2.356534
## 2: GCKR TA 2.281527
## 3: GCKR TA 2.292095
## 4: GCKR AA 1.000000
## 5: GCKR AA 1.000000
## 6: GCKR AA 1.000000
DT2$stimulation <- factor(DT2$stimulation, levels= c("AA", "TA"),labels = c("AA", "TA"))
p<- ggplot(DT2, aes(x=stimulation, y=FC, fill=stimulation)) + #
# geom_violin()+ #, "#D95F02", "#7570B3" width = 1.8, fill = c("#1B9E77")
geom_boxplot()+ #fill = c("#1B9E77"), width=0.9
# geom_bar(stat="identity", position=position_dodge()) +
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test")+ # Add pairwise comparisons p-value
ylim(c(0, 3))
p
#### Identify and rank kinases that can phosphorylate GCKR
PROTan <- fread("./results/3 Annotated species protein compilation.xls")
PROTan$external_gene_name <- toupper(PROTan$external_gene_name)
#### Return Proteins for DEGs identified
ID2 <- toupper("GCKR")
TRANS_PROT <- PROTan[(PROTan$external_gene_name %in% ID2),]
TRANS_PROT <- TRANS_PROT[order(TRANS_PROT$external_gene_name),]
TRANS_PROT[,c(1,3:6), with = FALSE]
#### Sort out Splice Variants so that the longest transcript of each protein is represented only once in each species
Sorted_Proteins <- VariantSort(TRANS_PROT, variant = "MAX")
Sorted_Proteins[,c(1, 4:7), with = FALSE]
## ensembl_peptide_id Scientific_Name Common_Name external_gene_name Length
## 1: ENSAMEP00000001336 Ailuropoda_melanoleuca Giant panda GCKR 625
## 2: ENSAMXP00000000871 Astyanax_mexicanus Cave fish GCKR 446
## 3: ENSBTAP00000041859 Bos_taurus Cattle GCKR 505
## 4: ENSCJAP00000001541 Callithrix_jacchus Common marmoset monkey GCKR 568
## 5: ENSCPOP00000007381 Cavia_porcellus Guinea pig GCKR 625
## 6: ENSCSAP00000006296 Chlorocebus_sabaeus Green monkey GCKR 625
## 7: ENSDARP00000124274 Danio_rerio Zebrafish GCKR 281
## 8: ENSDORP00000004770 Dipodomys_ordii Ord's kangaroo rat GCKR 624
## 9: ENSETEP00000001256 Echinops_telfairi Lesser hedgehog tenrec GCKR 487
## 10: ENSECAP00000018966 Equus_caballus Horse GCKR 625
## 11: ENSEEUP00000006890 Erinaceus_europaeus European hedgehog GCKR 514
## 12: ENSFCAP00000018283 Felis_catus Cat GCKR 155
## 13: ENSGACP00000019720 Gasterosteus_aculeatus Three-spined stickleback GCKR 583
## 14: ENSGGOP00000012482 Gorilla_gorilla Gorilla GCKR 625
## 15: ENSP00000264717 Homo_sapiens Human GCKR 625
## 16: ENSSTOP00000004530 Ictidomys_tridecemlineatus Thirteen-lined ground squirrel GCKR 576
## 17: ENSLACP00000006889 Latimeria_chalumnae West indian ocean coelacanth GCKR 346
## 18: ENSLOCP00000020702 Lepisosteus_oculatus Spotted gar GCKR 626
## 19: ENSLAFP00000005668 Loxodonta_africana African bush elephant GCKR 621
## 20: ENSMMUP00000006765 Macaca_mulatta Rhesus macaque GCKR 625
## 21: ENSMEUP00000006501 Macropus_eugenii Tammar wallaby GCKR 466
## 22: ENSMICP00000014537 Microcebus_murinus Gray mouse lemur GCKR 474
## 23: ENSMODP00000019483 Monodelphis_domestica Gray short-tailed opossum GCKR 625
## 24: ENSMUSP00000144202 Mus_musculus Mouse GCKR 623
## 25: ENSNLEP00000004669 Nomascus_leucogenys Northern white-cheeked gibbon GCKR 625
## 26: ENSOPRP00000011814 Ochotona_princeps American pika GCKR 623
## 27: ENSONIP00000015155 Oreochromis_niloticus Nile tilapia GCKR 585
## 28: ENSOCUP00000011833 Oryctolagus_cuniculus European rabbit GCKR 622
## 29: ENSORLP00000005539 Oryzias_latipes Japanese rice fish GCKR 582
## 30: ENSOGAP00000008281 Otolemur_garnettii Northern greater galago GCKR 524
## 31: ENSOARP00000020812 Ovis_aries Sheep GCKR 512
## 32: ENSPTRP00000020223 Pan_troglodytes Chimpanzee GCKR 625
## 33: ENSPANP00000009029 Papio_anubis Olive baboon GCKR 625
## 34: ENSPMAP00000010620 Petromyzon_marinus Sea lamprey GCKR 588
## 35: ENSPFOP00000030735 Poecilia_formosa Amazon molly GCKR 600
## 36: ENSPPYP00000014009 Pongo_abelii Sumatran orangutan GCKR 625
## 37: ENSPCAP00000015899 Procavia_capensis Rock badger GCKR 583
## 38: ENSPVAP00000014342 Pteropus_vampyrus Large flying fox GCKR 334
## 39: ENSRNOP00000064260 Rattus_norvegicus Rat GCKR 627
## 40: ENSSHAP00000016711 Sarcophilus_harrisii Tasmanian devil GCKR 625
## 41: ENSSSCP00000020241 Sus_scrofa Wild boar GCKR 625
## 42: ENSTRUP00000032438 Takifugu_rubripes Japanese puffer GCKR 595
## 43: ENSTNIP00000017284 Tetraodon_nigroviridis Green spotted puffer GCKR 566
## 44: ENSTBEP00000003453 Tupaia_belangeri Northern treeshrew GCKR 529
## 45: ENSTTRP00000011887 Tursiops_truncatus Bottlenose dolphin GCKR 610
## 46: ENSVPAP00000001840 Vicugna_pacos Alpaca GCKR 349
## 47: ENSXETP00000060990 Xenopus_tropicalis Western clawed frog GCKR 111
## 48: ENSXMAP00000007838 Xiphophorus_maculatus Southern platyfish GCKR 579
## ensembl_peptide_id Scientific_Name Common_Name external_gene_name Length
Load the Post translational enzyme data table
EZDT <- fread("./results/2019-8-30 Post translational enzyme data table for R BG.csv", header = TRUE)
EZDT <- EZDT[,.(Targeting_Factor, Consensus_Sequence)]
head(EZDT)
## Targeting_Factor Consensus_Sequence
## 1: AMPK (K|R|H)(F|L|M|I|V)(K|R|H)...(S|T)...(F|L|M|I|V)
## 2: AMPK .(F|L|M|I|V)(K|R|H)...(S|T)...(F|L|M|I|V)
## 3: AMPK (K|R|H)(F|L|M|I|V)....(S|T)...(F|L|M|I|V)
## 4: AMPK .(F|L|M|I|V)....(S|T)...(F|L|M|I|V)
## 5: AKT R.R(S|T|A)(S|T|A)(S|T)(F|L)
## 6: AKT R.R..(S|T)
Perform the transcription factor query computations
setnames(Sorted_Proteins, colnames(Sorted_Proteins),
c("ensembl_peptide_id", "Sequence", "Species_File", "Scientific_Name", "Common_Name", "gene_symbol", "Length"))
PROT_TOT_Species <- TFpredict(Sorted_Proteins, EZDT, type = "multiple_species")
PROT_TOT_Species[,c(1:6,8:11), with = FALSE]
## Consensus_Sequence start end Number_Hits Targeting_Factor length Identified_sequence gene_symbol Species Scientific_Name
## 1: (G|A|V|L|M|I)K.E 17 20 1 UBE2I 625 PgkweL GCKR Giant panda Ailuropoda_melanoleuca
## 2: (K|R|H)(F|L|M|I|V)....(S|T)...(F|L|M|I|V) 475 485 1 AMPK 625 QkfqhelstkwvL GCKR Giant panda Ailuropoda_melanoleuca
## 3: (M|I|L|V).(K|R)..(S|T) 316 321 2 CHKI 625 AlmkqasT GCKR Giant panda Ailuropoda_melanoleuca
## 4: (M|I|L|V).(K|R)..(S|T) 449 454 2 CHKI 625 PlkklftS GCKR Giant panda Ailuropoda_melanoleuca
## 5: .(F|L|M|I|V)....(S|T)...(F|L|M|I|V) 103 113 7 AMPK 625 VvlsgggssgrmA GCKR Giant panda Ailuropoda_melanoleuca
## 6: .(F|L|M|I|V)....(S|T)...(F|L|M|I|V) 114 124 7 AMPK 625 MaflmsvsfnqlM GCKR Giant panda Ailuropoda_melanoleuca
Determine the number of times each Protein consensus sequence is present for each target in human
TF_ABUN <- TFRankR(DT = PROT_TOT_Species, sortBy = "abundance", dec = TRUE)
TF_ABUN <- TF_ABUN[TF_ABUN$Species == "Human",]; TF_ABUN
## Number_Hits Targeting_Factor gene_symbol Species
## 1: 7 AMPK GCKR Human
## 2: 6 MSK1 GCKR Human
## 3: 6 MSK2 GCKR Human
## 4: 1 CaMKIV GCKR Human
## 5: 1 UBE2I GCKR Human
## 6: 1 AMPK GCKR Human
## 7: 1 CHKI GCKR Human
## 8: 1 N-Glycosylation GCKR Human
Determine the transcription factors that have the greatest species preservation for each promoter
TF_SPEC <- TFRankR(DT = PROT_TOT_Species, sortBy = "species", dec = TRUE); TF_SPEC
## Targeting_Factor Number_Species gene_symbol
## 1: AMPK 46 GCKR
## 2: MSK1 44 GCKR
## 3: MSK2 44 GCKR
## 4: N-Glycosylation 40 GCKR
## 5: CHKI 38 GCKR
## 6: CaMKIV 29 GCKR
## 7: UBE2I 23 GCKR
## 8: PAK 6 GCKR
## 9: RSK1 3 GCKR
## 10: CaMKI 3 GCKR
## 11: AKT 2 GCKR
g1 <- ggplot(TF_SPEC,
aes(y=TF_SPEC$Number_Species, x=reorder(TF_SPEC$Targeting_Factor, TF_SPEC$Number_Species), ymin=0, ymax=TF_SPEC$Number_Species), col="blue") +
geom_point(size=1, color="#E7B800") +
geom_linerange(size=.5, color="#00AFBB") +
theme(plot.title = element_text(size = 10,colour="black")) +
coord_flip() +
theme_pubr()+
geom_text(aes(label = paste(TF_SPEC$Number_Species,sep=":::"), y = TF_SPEC$Number_Species + .05*max(TF_SPEC$Number_Species)), size = 4) +
ggtitle("Species Conservation of\n Consensus Sequences\n") +
ylab("Number of sites") +
xlab("Enzyme\n");g1
#########################################
#### GCKR kinase assay western blot #####
#########################################
DATA <- fread("./results/99 western quantification2.csv", sep=",")
DATA <- DATA[DATA$experiment == "GCKR_kinase",]
#### take differences between background and densitometry value ####
DATA$difference <- DATA$background - DATA$densitometry
DATA
## experiment blot treatment densitometry background difference
## 1: GCKR_kinase P-ser S481 132.662 144.267 11.605
## 2: GCKR_kinase P-ser S481 120.477 144.267 23.790
## 3: GCKR_kinase P-ser S481A 136.326 144.267 7.941
## 4: GCKR_kinase P-ser S481A 136.786 144.267 7.481
## 5: GCKR_kinase GCKR S481 104.339 144.267 39.928
## 6: GCKR_kinase GCKR S481 111.658 144.267 32.609
## 7: GCKR_kinase GCKR S481A 110.628 144.267 33.639
## 8: GCKR_kinase GCKR S481A 106.036 144.267 38.231
## 9: GCKR_kinase P-ser S481 90.328 104.050 13.722
## 10: GCKR_kinase P-ser S481 70.921 104.050 33.129
## 11: GCKR_kinase P-ser S481A 100.213 104.050 3.837
## 12: GCKR_kinase P-ser S481A 99.609 104.050 4.441
## 13: GCKR_kinase GCKR S481 42.496 104.050 61.554
## 14: GCKR_kinase GCKR S481 41.865 104.050 62.185
## 15: GCKR_kinase GCKR S481A 45.393 104.050 58.657
## 16: GCKR_kinase GCKR S481A 49.207 104.050 54.843
## 17: GCKR_kinase P-ser S481 129.408 145.277 15.869
## 18: GCKR_kinase P-ser S481 103.085 145.277 42.192
## 19: GCKR_kinase P-ser S481A 140.564 145.277 4.713
## 20: GCKR_kinase P-ser S481A 141.256 145.277 4.021
## 21: GCKR_kinase GCKR S481 76.320 145.277 68.957
## 22: GCKR_kinase GCKR S481 89.152 145.277 56.125
## 23: GCKR_kinase GCKR S481A 87.371 145.277 57.906
## 24: GCKR_kinase GCKR S481A 75.094 145.277 70.183
## experiment blot treatment densitometry background difference
#### Construct data table
dt <- data.table(treatment = DATA[DATA$blot == "P-ser",]$treatment,
Pser = DATA[DATA$blot == "P-ser",]$difference,
GCKR = DATA[DATA$blot == "GCKR",]$difference)
dt$change <- dt$Pser/dt$GCKR
dt$treatment <- paste(rep(c("CTRL", "AMPK"), 6), dt$treatment, sep = "_")
dt <- rbind(dt[dt$treatment == "CTRL_S481"], dt[dt$treatment == "AMPK_S481"], dt[dt$treatment == "CTRL_S481A"], dt[dt$treatment == "AMPK_S481A"])
dt$FC <- c(rep(1, 3),
(dt[dt$treatment == "AMPK_S481"]$change/dt[dt$treatment == "CTRL_S481"]$change),
(dt[dt$treatment == "CTRL_S481A"]$change/dt[dt$treatment == "CTRL_S481"]$change),
(dt[dt$treatment == "AMPK_S481A"]$change/dt[dt$treatment == "CTRL_S481"]$change))
dt
## treatment Pser GCKR change FC
## 1: CTRL_S481 11.605 39.928 0.29064817 1.0000000
## 2: CTRL_S481 13.722 61.554 0.22292621 1.0000000
## 3: CTRL_S481 15.869 68.957 0.23012892 1.0000000
## 4: AMPK_S481 23.790 32.609 0.72955319 2.5100905
## 5: AMPK_S481 33.129 62.185 0.53274906 2.3898000
## 6: AMPK_S481 42.192 56.125 0.75175056 3.2666496
## 7: CTRL_S481A 7.941 33.639 0.23606528 0.8122029
## 8: CTRL_S481A 3.837 58.657 0.06541419 0.2934343
## 9: CTRL_S481A 4.713 57.906 0.08139053 0.3536736
## 10: AMPK_S481A 7.481 38.231 0.19567890 0.6732501
## 11: AMPK_S481A 4.441 54.843 0.08097661 0.3632440
## 12: AMPK_S481A 4.021 70.183 0.05729308 0.2489608
dt$treatment <- factor(dt$treatment, levels= c("CTRL_S481", "AMPK_S481", "CTRL_S481A", "AMPK_S481A"),
labels = c("CTRL_S481", "AMPK_S481", "CTRL_S481A", "AMPK_S481A"))
my_comparisons <- list( c("AMPK_S481", "CTRL_S481"), c("AMPK_S481", "AMPK_S481A"),
c("AMPK_S481", "CTRL_S481A"))
p<- ggplot(dt, aes(x=treatment, y=FC, fill=treatment)) +
geom_boxplot()+
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test", comparisons = my_comparisons)+ # Add pairwise comparisons p-value
ylim(c(0, 4.5))
p
####################################
#### Shear P-GCKR western blot #####
####################################
DATA <- fread("./results/99 western quantification2.csv", sep=",")
DATA <- DATA[DATA$experiment == "shear_pGCKR",]
#### take differences between background and densitometry value ####
DATA$difference <- DATA$background - DATA$densitometry
DATA
## experiment blot treatment densitometry background difference
## 1: shear_pGCKR P-GCKR PS10 90.218 102.234 12.016
## 2: shear_pGCKR P-GCKR PS60 96.796 102.234 5.438
## 3: shear_pGCKR P-GCKR OS10 95.134 102.234 7.100
## 4: shear_pGCKR P-GCKR OS60 99.644 102.234 2.590
## 5: shear_pGCKR P-GCKR PS10_AMPK 102.148 102.234 0.086
## 6: shear_pGCKR P-GCKR PS60_AMPK 102.138 102.234 0.096
## 7: shear_pGCKR GCKR PS10 53.410 102.234 48.824
## 8: shear_pGCKR GCKR PS60 42.238 102.234 59.996
## 9: shear_pGCKR GCKR OS10 31.416 102.234 70.818
## 10: shear_pGCKR GCKR OS60 37.263 102.234 64.971
## 11: shear_pGCKR GCKR PS10_AMPK 43.490 102.234 58.744
## 12: shear_pGCKR GCKR PS60_AMPK 46.127 102.234 56.107
## 13: shear_pGCKR P-GCKR PS10 58.900 108.013 49.113
## 14: shear_pGCKR P-GCKR PS60 61.889 108.013 46.124
## 15: shear_pGCKR P-GCKR OS10 67.699 108.013 40.314
## 16: shear_pGCKR P-GCKR OS60 80.670 108.013 27.343
## 17: shear_pGCKR P-GCKR PS10_AMPK 107.449 108.013 0.564
## 18: shear_pGCKR P-GCKR PS60_AMPK 107.125 108.013 0.888
## 19: shear_pGCKR GCKR PS10 49.015 108.013 58.998
## 20: shear_pGCKR GCKR PS60 45.959 108.013 62.054
## 21: shear_pGCKR GCKR OS10 44.227 108.013 63.786
## 22: shear_pGCKR GCKR OS60 38.754 108.013 69.259
## 23: shear_pGCKR GCKR PS10_AMPK 42.609 108.013 65.404
## 24: shear_pGCKR GCKR PS60_AMPK 34.915 108.013 73.098
## 25: shear_pGCKR P-GCKR PS10 66.910 107.970 41.060
## 26: shear_pGCKR P-GCKR PS60 66.526 107.970 41.444
## 27: shear_pGCKR P-GCKR OS10 75.686 107.970 32.284
## 28: shear_pGCKR P-GCKR OS60 85.784 107.970 22.186
## 29: shear_pGCKR P-GCKR PS10_AMPK 106.320 107.970 1.650
## 30: shear_pGCKR P-GCKR PS60_AMPK 107.560 107.970 0.410
## 31: shear_pGCKR GCKR PS10 65.852 107.970 42.118
## 32: shear_pGCKR GCKR PS60 62.894 107.970 45.076
## 33: shear_pGCKR GCKR OS10 68.929 107.970 39.041
## 34: shear_pGCKR GCKR OS60 65.864 107.970 42.106
## 35: shear_pGCKR GCKR PS10_AMPK 69.820 107.970 38.150
## 36: shear_pGCKR GCKR PS60_AMPK 78.470 107.970 29.500
## experiment blot treatment densitometry background difference
#### Construct data table
dt <- data.table(treatment = DATA[DATA$blot == "P-GCKR",]$treatment,
Pser = DATA[DATA$blot == "P-GCKR",]$difference,
GCKR = DATA[DATA$blot == "GCKR",]$difference)
dt$change <- dt$Pser/dt$GCKR
dt <- rbind(dt[dt$treatment == "PS10"], dt[dt$treatment == "PS60"],
dt[dt$treatment == "OS10"], dt[dt$treatment == "OS60"],
dt[dt$treatment == "PS10_AMPK"], dt[dt$treatment == "PS60_AMPK"])
dt$FC <- c(
(dt[dt$treatment == "PS10"]$change/dt[dt$treatment == "OS10"]$change),
(dt[dt$treatment == "PS60"]$change/dt[dt$treatment == "OS60"]$change),
rep(1, 3),
rep(1, 3),
(dt[dt$treatment == "PS10_AMPK"]$change/dt[dt$treatment == "OS10"]$change),
(dt[dt$treatment == "PS60_AMPK"]$change/dt[dt$treatment == "OS60"]$change))
dt
## treatment Pser GCKR change FC
## 1: PS10 12.016 48.824 0.246108471 2.45477602
## 2: PS10 49.113 58.998 0.832451948 1.31713003
## 3: PS10 41.060 42.118 0.974880099 1.17892126
## 4: PS60 5.438 59.996 0.090639376 2.27371849
## 5: PS60 46.124 62.054 0.743288104 1.88272650
## 6: PS60 41.444 45.076 0.919424971 1.74494311
## 7: OS10 7.100 70.818 0.100256997 1.00000000
## 8: OS10 40.314 63.786 0.632019565 1.00000000
## 9: OS10 32.284 39.041 0.826925540 1.00000000
## 10: OS60 2.590 64.971 0.039863939 1.00000000
## 11: OS60 27.343 69.259 0.394793456 1.00000000
## 12: OS60 22.186 42.106 0.526908279 1.00000000
## 13: PS10_AMPK 0.086 58.744 0.001463979 0.01460227
## 14: PS10_AMPK 0.564 65.404 0.008623326 0.01364408
## 15: PS10_AMPK 1.650 38.150 0.043250328 0.05230257
## 16: PS60_AMPK 0.096 56.107 0.001711016 0.04292141
## 17: PS60_AMPK 0.888 73.098 0.012148075 0.03077071
## 18: PS60_AMPK 0.410 29.500 0.013898305 0.02637709
dt$treatment <- factor(dt$treatment, levels= c("PS10", "PS60", "OS10", "OS60", "PS10_AMPK", "PS60_AMPK"),
labels = c("PS10", "PS60", "OS10", "OS60", "PS10_AMPK", "PS60_AMPK"))
my_comparisons <- list( c("PS10", "OS10"), c("PS60", "OS60"))
p<- ggplot(dt, aes(x=treatment, y=FC, fill=treatment)) +
geom_boxplot()+
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test", comparisons = my_comparisons)+ # Add pairwise comparisons p-value
ylim(c(0, 4))
p
dt$treatment <- gsub("10|60", "", dt$treatment)
dt
## treatment Pser GCKR change FC
## 1: PS 12.016 48.824 0.246108471 2.45477602
## 2: PS 49.113 58.998 0.832451948 1.31713003
## 3: PS 41.060 42.118 0.974880099 1.17892126
## 4: PS 5.438 59.996 0.090639376 2.27371849
## 5: PS 46.124 62.054 0.743288104 1.88272650
## 6: PS 41.444 45.076 0.919424971 1.74494311
## 7: OS 7.100 70.818 0.100256997 1.00000000
## 8: OS 40.314 63.786 0.632019565 1.00000000
## 9: OS 32.284 39.041 0.826925540 1.00000000
## 10: OS 2.590 64.971 0.039863939 1.00000000
## 11: OS 27.343 69.259 0.394793456 1.00000000
## 12: OS 22.186 42.106 0.526908279 1.00000000
## 13: PS_AMPK 0.086 58.744 0.001463979 0.01460227
## 14: PS_AMPK 0.564 65.404 0.008623326 0.01364408
## 15: PS_AMPK 1.650 38.150 0.043250328 0.05230257
## 16: PS_AMPK 0.096 56.107 0.001711016 0.04292141
## 17: PS_AMPK 0.888 73.098 0.012148075 0.03077071
## 18: PS_AMPK 0.410 29.500 0.013898305 0.02637709
dt$treatment <- factor(dt$treatment, levels= c("PS", "OS", "PS_AMPK"),
labels = c("PS", "OS", "PS_AMPK"))
my_comparisons <- list( c("PS", "OS"), c("PS", "PS_AMPK"))
p<- ggplot(dt, aes(x=treatment, y=FC, fill=treatment)) +
geom_boxplot()+
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test", comparisons = my_comparisons)+ # Add pairwise comparisons p-value
ylim(c(0, 4))
p
########################################
#### GCKR mutant coIP western blot #####
########################################
DATA <- fread("./results/99 western quantification2.csv", sep=",")
DATA <- DATA[DATA$experiment == "GCKR_mutant_coIP",]
#### take differences between background and densitometry value ####
DATA$difference <- DATA$background - DATA$densitometry
DATA
## experiment blot treatment densitometry background difference
## 1: GCKR_mutant_coIP HK S 70.381 83.491 13.110
## 2: GCKR_mutant_coIP HK A 79.620 83.491 3.871
## 3: GCKR_mutant_coIP HK D 59.240 83.491 24.251
## 4: GCKR_mutant_coIP GCKR S 51.287 83.491 32.204
## 5: GCKR_mutant_coIP GCKR A 50.387 83.491 33.104
## 6: GCKR_mutant_coIP GCKR D 44.407 83.491 39.084
## 7: GCKR_mutant_coIP HK S 54.659 85.253 30.594
## 8: GCKR_mutant_coIP HK A 69.719 85.253 15.534
## 9: GCKR_mutant_coIP HK D 42.334 85.253 42.919
## 10: GCKR_mutant_coIP GCKR S 35.942 85.253 49.311
## 11: GCKR_mutant_coIP GCKR A 40.848 85.253 44.405
## 12: GCKR_mutant_coIP GCKR D 43.546 85.253 41.707
## 13: GCKR_mutant_coIP HK S 92.437 102.204 9.767
## 14: GCKR_mutant_coIP HK A 98.729 102.204 3.475
## 15: GCKR_mutant_coIP HK D 84.569 102.204 17.635
## 16: GCKR_mutant_coIP GCKR S 35.990 102.204 66.214
## 17: GCKR_mutant_coIP GCKR A 39.192 102.204 63.012
## 18: GCKR_mutant_coIP GCKR D 37.628 102.204 64.576
#### Construct data table
dt <- data.table(treatment = DATA[DATA$blot == "GCKR",]$treatment,
HK = DATA[DATA$blot == "HK",]$difference,
GCKR = DATA[DATA$blot == "GCKR",]$difference)
dt$change <- dt$HK/dt$GCKR
dt
## treatment HK GCKR change
## 1: S 13.110 32.204 0.40709229
## 2: A 3.871 33.104 0.11693451
## 3: D 24.251 39.084 0.62048409
## 4: S 30.594 49.311 0.62042952
## 5: A 15.534 44.405 0.34982547
## 6: D 42.919 41.707 1.02905987
## 7: S 9.767 66.214 0.14750657
## 8: A 3.475 63.012 0.05514823
## 9: D 17.635 64.576 0.27308907
dt <- rbind(dt[dt$treatment == "S"], dt[dt$treatment == "A"],
dt[dt$treatment == "D"])
dt$FC <- c(rep(1, 3),
(dt[dt$treatment == "A"]$change/dt[dt$treatment == "S"]$change),
(dt[dt$treatment == "D"]$change/dt[dt$treatment == "S"]$change))
dt
## treatment HK GCKR change FC
## 1: S 13.110 32.204 0.40709229 1.0000000
## 2: S 30.594 49.311 0.62042952 1.0000000
## 3: S 9.767 66.214 0.14750657 1.0000000
## 4: A 3.871 33.104 0.11693451 0.2872432
## 5: A 15.534 44.405 0.34982547 0.5638440
## 6: A 3.475 63.012 0.05514823 0.3738696
## 7: D 24.251 39.084 0.62048409 1.5241853
## 8: D 42.919 41.707 1.02905987 1.6586249
## 9: D 17.635 64.576 0.27308907 1.8513689
dt$treatment <- factor(dt$treatment, levels= c("S", "A", "D"),
labels = c("S", "A", "D"))
my_comparisons <- list( c("S", "A"), c("S", "D"))
p<- ggplot(dt, aes(x=treatment, y=FC, fill=treatment)) +
geom_boxplot()+
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test", comparisons = my_comparisons)+ # Add pairwise comparisons p-value
ylim(c(0, 3))
p
##################################
#### AMPK pGCKR western blot #####
##################################
DATA <- fread("./results/99 western quantification2.csv", sep=",")
DATA <- DATA[DATA$experiment == "AMPK_pGCKR",]
#### take differences between background and densitometry value ####
DATA$difference <- DATA$background - DATA$densitometry
DATA
## experiment blot treatment densitometry background difference
## 1: AMPK_pGCKR P-GCKR WT 128.746 177.842 49.096
## 2: AMPK_pGCKR P-GCKR WT 105.693 177.842 72.149
## 3: AMPK_pGCKR P-GCKR WT 67.933 177.842 109.909
## 4: AMPK_pGCKR P-GCKR KO 151.349 177.842 26.493
## 5: AMPK_pGCKR P-GCKR KO 143.694 177.842 34.148
## 6: AMPK_pGCKR P-GCKR KO 147.092 177.842 30.750
## 7: AMPK_pGCKR GCKR WT 66.503 177.842 111.339
## 8: AMPK_pGCKR GCKR WT 63.630 177.842 114.212
## 9: AMPK_pGCKR GCKR WT 49.551 177.842 128.291
## 10: AMPK_pGCKR GCKR KO 51.076 177.842 126.766
## 11: AMPK_pGCKR GCKR KO 52.082 177.842 125.760
## 12: AMPK_pGCKR GCKR KO 61.922 177.842 115.920
#### Construct data table
dt <- data.table(treatment = DATA[DATA$blot == "GCKR",]$treatment,
pGCKR = DATA[DATA$blot == "P-GCKR",]$difference,
GCKR = DATA[DATA$blot == "GCKR",]$difference)
dt$change <- dt$pGCKR/dt$GCKR
dt <- rbind(dt[dt$treatment == "WT"], dt[dt$treatment == "KO"])
dt$FC <- c((dt[dt$treatment == 'WT']$change/dt[dt$treatment == 'KO']$change), rep(1, 3))
dt
## treatment pGCKR GCKR change FC
## 1: WT 49.096 111.339 0.4409596 2.109942
## 2: WT 72.149 114.212 0.6317112 2.326461
## 3: WT 109.909 128.291 0.8567164 3.229612
## 4: KO 26.493 126.766 0.2089914 1.000000
## 5: KO 34.148 125.760 0.2715331 1.000000
## 6: KO 30.750 115.920 0.2652692 1.000000
dt$treatment <- factor(dt$treatment, levels= c("WT", "KO"),labels = c("WT", "KO"))
p<- ggplot(dt, aes(x=treatment, y=FC, fill=treatment)) +
geom_boxplot()+
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test")+ # Add pairwise comparisons p-value
ylim(c(0, 4))
p
######################################
#### AMPK GCKR coIP western blot #####
######################################
DATA <- fread("./results/99 western quantification2.csv", sep=",")
DATA <- DATA[DATA$experiment == "AMPK_GCKR_coIP",]
#### take differences between background and densitometry value ####
DATA$difference <- DATA$background - DATA$densitometry
DATA
## experiment blot treatment densitometry background difference
## 1: AMPK_GCKR_coIP HK WT 84.105 216.503 132.398
## 2: AMPK_GCKR_coIP HK WT 147.082 216.503 69.421
## 3: AMPK_GCKR_coIP HK WT 91.894 216.503 124.609
## 4: AMPK_GCKR_coIP HK KO 123.395 216.503 93.108
## 5: AMPK_GCKR_coIP HK KO 133.132 216.503 83.371
## 6: AMPK_GCKR_coIP HK KO 116.167 216.503 100.336
## 7: AMPK_GCKR_coIP GCKR WT 182.944 216.503 33.559
## 8: AMPK_GCKR_coIP GCKR WT 186.982 216.503 29.521
## 9: AMPK_GCKR_coIP GCKR WT 187.811 216.503 28.692
## 10: AMPK_GCKR_coIP GCKR KO 169.464 216.503 47.039
## 11: AMPK_GCKR_coIP GCKR KO 143.176 216.503 73.327
## 12: AMPK_GCKR_coIP GCKR KO 176.633 216.503 39.870
#### Construct data table
dt <- data.table(treatment = DATA[DATA$blot == "GCKR",]$treatment,
HK = DATA[DATA$blot == "HK",]$difference,
GCKR = DATA[DATA$blot == "GCKR",]$difference)
dt$change <- dt$HK/dt$GCKR
dt <- rbind(dt[dt$treatment == "WT"], dt[dt$treatment == "KO"])
dt$FC <- c((dt[dt$treatment == 'WT']$change/dt[dt$treatment == 'KO']$change), rep(1, 3))
dt
## treatment HK GCKR change FC
## 1: WT 132.398 33.559 3.945231 1.993166
## 2: WT 69.421 29.521 2.351580 2.068277
## 3: WT 124.609 28.692 4.342988 1.725751
## 4: KO 93.108 47.039 1.979379 1.000000
## 5: KO 83.371 73.327 1.136975 1.000000
## 6: KO 100.336 39.870 2.516579 1.000000
dt$treatment <- factor(dt$treatment, levels= c("WT", "KO"),labels = c("WT", "KO"))
p<- ggplot(dt, aes(x=treatment, y=FC, fill=treatment)) +
geom_boxplot()+
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test")+ # Add pairwise comparisons p-value
ylim(c(0, 3))
p
DATA <- fread("./results/99 qPCR data.csv", sep=",")
DATA$group <- paste(DATA$Gene, DATA$Tratment, sep = "@")
DATA <- DATA[DATA$Experiment == "HR_mice",]
#### Take data averages ####
mea <- DATA[ ,.(mean_actin = mean(Actin_CTRL_Cq), mean_gene = mean(Cq_value)), by = group]
#### take differences between averages ####
mea$diff <- mea$mean_gene - mea$mean_actin
#### compute fold changes ####
spl <- unlist(strsplit(mea[grep("HR", mea$group),]$group, split = "@")); name <- spl[!grepl("HR", spl)]
DT <- data.table(Gene = name,
HR = mea[grep("HR", mea$group),]$diff,
LR = mea[grep("LR", mea$group),]$diff,
FC = 2^-(mea[grep("HR", mea$group),]$diff-mea[grep("LR", mea$group),]$diff))
# Take means
DT[, means := mean(FC), by = Gene]
DT$log2FC <- log2(DT$FC)
DT
## Gene HR LR FC means log2FC
## 1: HK1 2.953333 2.0200000 0.5236471 0.5455471 -0.9333333
## 2: HK1 2.526667 1.7366667 0.5783441 0.5455471 -0.7900000
## 3: HK1 3.106667 2.2033333 0.5346500 0.5455471 -0.9033333
## 4: GPI1 5.303333 4.1433333 0.4475125 0.4196309 -1.1600000
## 5: GPI1 5.646667 4.3266667 0.4005349 0.4196309 -1.3200000
## 6: GPI1 5.613333 4.3300000 0.4108452 0.4196309 -1.2833333
## 7: PFKM 4.563333 3.4366667 0.4579726 0.4508057 -1.1266667
## 8: PFKM 4.750000 3.4766667 0.4137028 0.4508057 -1.2733333
## 9: PFKM 3.533333 2.4766667 0.4807415 0.4508057 -1.0566667
## 10: ALDOA 2.026667 0.2733333 0.2966157 0.2881635 -1.7533333
## 11: ALDOA 2.976667 1.1900000 0.2898409 0.2881635 -1.7866667
## 12: ALDOA 5.330000 3.4833333 0.2780340 0.2881635 -1.8466667
## 13: TPI1 3.973333 2.1166667 0.2761135 0.2578421 -1.8566667
## 14: TPI1 3.813333 1.6700000 0.2263562 0.2578421 -2.1433333
## 15: TPI1 5.320000 3.4366667 0.2710567 0.2578421 -1.8833333
## 16: GAPDH 3.240000 2.4166667 0.5651347 0.5403956 -0.8233333
## 17: GAPDH 2.683333 1.7933333 0.5396141 0.5403956 -0.8900000
## 18: GAPDH 2.470000 1.5166667 0.5164379 0.5403956 -0.9533333
## 19: PGK1 5.993333 4.1366667 0.2761135 0.2462456 -1.8566667
## 20: PGK1 5.803333 3.7800000 0.2459892 0.2462456 -2.0233333
## 21: PGK1 7.220000 5.0133333 0.2166343 0.2462456 -2.2066667
## 22: PGAM1 6.546667 4.7600000 0.2898409 0.2650165 -1.7866667
## 23: PGAM1 5.883333 3.7033333 0.2206757 0.2650165 -2.1800000
## 24: PGAM1 6.313333 4.5000000 0.2845328 0.2650165 -1.8133333
## 25: ENO3 3.726667 1.6666667 0.2398160 0.2510840 -2.0600000
## 26: ENO3 5.706667 3.9566667 0.2973018 0.2510840 -1.7500000
## 27: ENO3 4.240000 2.0300000 0.2161343 0.2510840 -2.2100000
## 28: PKM2 4.043333 2.1833333 0.2754763 0.2898042 -1.8600000
## 29: PKM2 4.100000 2.3366667 0.2945668 0.2898042 -1.7633333
## 30: PKM2 4.410000 2.6700000 0.2993697 0.2898042 -1.7400000
## 31: LDHA 3.596667 1.7333333 0.2748405 0.2651554 -1.8633333
## 32: LDHA 3.910000 1.8433333 0.2387104 0.2651554 -2.0666667
## 33: LDHA 3.543333 1.7166667 0.2819152 0.2651554 -1.8266667
## Gene HR LR FC means log2FC
DT$Gene <- factor(DT$Gene, levels= c("HK1", "GPI1", "PFKM", "ALDOA", "TPI1", "GAPDH", "PGK1", "PGAM1", "ENO3", "PKM2", "LDHA"),labels = c("HK1", "GPI1", "PFKM", "ALDOA", "TPI1", "GAPDH", "PGK1", "PGAM1", "ENO3", "PKM2", "LDHA"))
# my_comparisons <- list( c("hr2", "hr8"), c("hr8", "hr16"), c("hr2", "hr16") )
p<- ggplot(DT, aes(x=Gene, y=log2FC)) + # , fill=Gene
# geom_violin(width = 1.8, fill = c("#1B9E77"))+ #, "#D95F02", "#7570B3"
# # scale_fill_brewer(palette="Dark2")+
geom_boxplot(fill = c("#1B9E77"), width=0.9)+
# geom_bar(stat="identity") + #fill="skyblue", alpha=0.5
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black", fill = "black")+
theme_pubr()#+
# ylim(c(0, 2))
p
######################################
#### HR/LR KLF4 binding GCKR qPCR ####
######################################
DATA <- fread("./results/99 qPCR data.csv", sep=",")
DATA$group <- paste(DATA$Gene, DATA$Tratment, sep = "@")
DATA <- DATA[DATA$Experiment == "HRLR_KLF4_binding",]
#### Take data averages ####
mea <- DATA[ ,.(mean_actin = mean(Actin_CTRL_Cq), mean_gene = mean(Cq_value)), by = group]
#### take differences between averages ####
mea$diff <- mea$mean_gene - mea$mean_actin
#### compute fold changes ####
spl <- unlist(strsplit(mea[grep("HR", mea$group),]$group, split = "_")); name <- spl[grepl("HR", spl)]
name <- gsub("@HR", "", name)
HR = mea[grep("HR", mea$group),]$diff
LR = mea[grep("LR", mea$group),]$diff
stimulation <- c(rep("HR", length(HR)))#, rep("NULL", length(NUL)))
# static <- rep(mea[grep("static", mea$group),]$diff, (length(PS)/3))
DT <- data.table(Gene = name,
stimulation = stimulation,
HR = HR,
LR = LR,
FC = 2^-(c(HR)-LR))
# Take means
DT[, means := mean(FC), by = Gene]
DT
## Gene stimulation HR LR FC means
## 1: GCKR HR 8.956667 10.13333 2.260539 2.539033
## 2: GCKR HR 10.660000 11.94000 2.428390 2.539033
## 3: GCKR HR 12.536667 14.08667 2.928171 2.539033
DT2 = data.table(Gene = rep(DT$Gene[1], 6),
stimulation = c(DT$stimulation, rep("LR", 3)),
FC = c(DT$FC, rep(1, 3)))
DT2
## Gene stimulation FC
## 1: GCKR HR 2.260539
## 2: GCKR HR 2.428390
## 3: GCKR HR 2.928171
## 4: GCKR LR 1.000000
## 5: GCKR LR 1.000000
## 6: GCKR LR 1.000000
DT2$stimulation <- factor(DT2$stimulation, levels= c("LR", "HR"),labels = c("LR", "HR"))
p<- ggplot(DT2, aes(x=stimulation, y=FC, fill=stimulation)) + #
# geom_violin()+ #, "#D95F02", "#7570B3" width = 1.8, fill = c("#1B9E77")
geom_boxplot()+ #fill = c("#1B9E77"), width=0.9
# geom_bar(stat="identity", position=position_dodge()) +
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test")+ # Add pairwise comparisons p-value
ylim(c(0, 3))
p
#########################
#### HR/LR GCKR qPCR ####
#########################
DATA <- fread("./results/99 qPCR data.csv", sep=",")
DATA$group <- paste(DATA$Gene, DATA$Tratment, sep = "@")
DATA <- DATA[DATA$Experiment == "HRLR",]
#### Take data averages ####
mea <- DATA[ ,.(mean_actin = mean(Actin_CTRL_Cq), mean_gene = mean(Cq_value)), by = group]
#### take differences between averages ####
mea$diff <- mea$mean_gene - mea$mean_actin
#### compute fold changes ####
spl <- unlist(strsplit(mea[grep("HR", mea$group),]$group, split = "_")); name <- spl[grepl("HR", spl)]
name <- gsub("@HR", "", name)
HR = mea[grep("HR", mea$group),]$diff
LR = mea[grep("LR", mea$group),]$diff
stimulation <- c(rep("HR", length(HR)))#, rep("NULL", length(NUL)))
# static <- rep(mea[grep("static", mea$group),]$diff, (length(PS)/3))
DT <- data.table(Gene = name,
stimulation = stimulation,
HR = HR,
LR = LR,
FC = 2^-(c(HR)-LR))
# Take means
DT[, means := mean(FC), by = Gene]
DT
## Gene stimulation HR LR FC means
## 1: GCKR HR 4.553333 5.746667 2.286805 2.332484
## 2: GCKR HR 11.540000 12.826667 2.439637 2.332484
## 3: GCKR HR 9.110000 10.293333 2.271009 2.332484
DT2 = data.table(Gene = rep(DT$Gene[1], 6),
stimulation = c(DT$stimulation, rep("LR", 3)),
FC = c(DT$FC, rep(1, 3)))
DT2
## Gene stimulation FC
## 1: GCKR HR 2.286805
## 2: GCKR HR 2.439637
## 3: GCKR HR 2.271009
## 4: GCKR LR 1.000000
## 5: GCKR LR 1.000000
## 6: GCKR LR 1.000000
DT2$stimulation <- factor(DT2$stimulation, levels= c("LR", "HR"),labels = c("LR", "HR"))
p<- ggplot(DT2, aes(x=stimulation, y=FC, fill=stimulation)) + #
# geom_violin()+ #, "#D95F02", "#7570B3" width = 1.8, fill = c("#1B9E77")
geom_boxplot()+ #fill = c("#1B9E77"), width=0.9
# geom_bar(stat="identity", position=position_dodge()) +
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test")+ # Add pairwise comparisons p-value
ylim(c(0, 3))
p
######################################
#### HRLR abundance western blot #####
######################################
DATA <- fread("./results/99 western quantification2.csv", sep=",")
DATA <- DATA[DATA$experiment == "HRLR_abundance",]
#### take differences between background and densitometry value ####
DATA$difference <- DATA$background - DATA$densitometry
DATA
## experiment blot treatment densitometry background difference
## 1: HRLR_abundance GCKR HR 70.879 111.913 41.034
## 2: HRLR_abundance GCKR HR 66.460 111.913 45.453
## 3: HRLR_abundance GCKR HR 57.812 111.913 54.101
## 4: HRLR_abundance GCKR LR 67.844 111.913 44.069
## 5: HRLR_abundance GCKR LR 78.632 111.913 33.281
## 6: HRLR_abundance GCKR LR 79.547 111.913 32.366
## 7: HRLR_abundance KLF4 HR 90.602 111.913 21.311
## 8: HRLR_abundance KLF4 HR 77.051 111.913 34.862
## 9: HRLR_abundance KLF4 HR 75.559 111.913 36.354
## 10: HRLR_abundance KLF4 LR 90.077 111.913 21.836
## 11: HRLR_abundance KLF4 LR 85.354 111.913 26.559
## 12: HRLR_abundance KLF4 LR 88.851 111.913 23.062
## 13: HRLR_abundance Actin HR 77.456 111.913 34.457
## 14: HRLR_abundance Actin HR 65.084 111.913 46.829
## 15: HRLR_abundance Actin HR 72.728 111.913 39.185
## 16: HRLR_abundance Actin LR 61.212 111.913 50.701
## 17: HRLR_abundance Actin LR 64.621 111.913 47.292
## 18: HRLR_abundance Actin LR 74.105 111.913 37.808
#### Construct data table
dt <- data.table(treatment = DATA[DATA$blot == "GCKR",]$treatment,
gene = c("GCKR","GCKR","GCKR","GCKR","GCKR","GCKR","KLF4", "KLF4","KLF4","KLF4","KLF4","KLF4"),
gene_level = DATA[DATA$blot == "GCKR" | DATA$blot == "KLF4",]$difference,
Actin = c(DATA[DATA$blot == "Actin",]$difference, DATA[DATA$blot == "Actin",]$difference))
dt$change <- dt$gene_level/dt$Actin
dt$label <- paste(dt$gene, dt$treatment, sep = "_")
dt <- rbind(dt[dt$label == "GCKR_HR"], dt[dt$label == "GCKR_LR"],
dt[dt$label == "KLF4_HR"], dt[dt$label == "KLF4_LR"])
dt$FC <- c((dt[dt$label == 'GCKR_HR']$change/dt[dt$label == 'GCKR_LR']$change),
rep(1, 3),
(dt[dt$label == 'KLF4_HR']$change/dt[dt$label == 'KLF4_LR']$change),
rep(1, 3))
dt
## treatment gene gene_level Actin change label FC
## 1: HR GCKR 41.034 34.457 1.1908756 GCKR_HR 1.370092
## 2: HR GCKR 45.453 46.829 0.9706165 GCKR_HR 1.379237
## 3: HR GCKR 54.101 39.185 1.3806559 GCKR_HR 1.612799
## 4: LR GCKR 44.069 50.701 0.8691939 GCKR_LR 1.000000
## 5: LR GCKR 33.281 47.292 0.7037342 GCKR_LR 1.000000
## 6: LR GCKR 32.366 37.808 0.8560622 GCKR_LR 1.000000
## 7: HR KLF4 21.311 34.457 0.6184810 KLF4_HR 1.436051
## 8: HR KLF4 34.862 46.829 0.7444532 KLF4_HR 1.325603
## 9: HR KLF4 36.354 39.185 0.9277530 KLF4_HR 1.520965
## 10: LR KLF4 21.836 50.701 0.4306818 KLF4_LR 1.000000
## 11: LR KLF4 26.559 47.292 0.5615960 KLF4_LR 1.000000
## 12: LR KLF4 23.062 37.808 0.6099767 KLF4_LR 1.000000
dt$treatment <- factor(dt$treatment, levels= c("GCKR_HR", "GCKR_LR"),labels = c("KLF4_HR", "KLF4_LR"))
my_comparisons <- list( c("GCKR_HR", "GCKR_LR"), c("KLF4_HR", "KLF4_LR"))
p<- ggplot(dt, aes(x=label, y=FC, fill=label)) +
geom_boxplot()+
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test", comparisons = my_comparisons)+ # Add pairwise comparisons p-value
ylim(c(0, 2))
p
##################################
#### HRLR pGCKR western blot #####
##################################
DATA <- fread("./results/99 western quantification2.csv", sep=",")
DATA <- DATA[DATA$experiment == "HRLR_pGCKR",]
#### take differences between background and densitometry value ####
DATA$difference <- DATA$background - DATA$densitometry
DATA
## experiment blot treatment densitometry background difference
## 1: HRLR_pGCKR p-GCKR HR 70.249 114.878 44.629
## 2: HRLR_pGCKR p-GCKR HR 58.282 114.878 56.596
## 3: HRLR_pGCKR p-GCKR HR 28.956 114.878 85.922
## 4: HRLR_pGCKR p-GCKR LR 81.660 114.878 33.218
## 5: HRLR_pGCKR p-GCKR LR 86.154 114.878 28.724
## 6: HRLR_pGCKR p-GCKR LR 82.840 114.878 32.038
## 7: HRLR_pGCKR GCKR HR 79.302 114.878 35.576
## 8: HRLR_pGCKR GCKR HR 79.437 114.878 35.441
## 9: HRLR_pGCKR GCKR HR 71.434 114.878 43.444
## 10: HRLR_pGCKR GCKR LR 68.057 114.878 46.821
## 11: HRLR_pGCKR GCKR LR 69.822 114.878 45.056
## 12: HRLR_pGCKR GCKR LR 72.659 114.878 42.219
#### Construct data table
dt <- data.table(treatment = DATA[DATA$blot == "GCKR",]$treatment,
pGCKR = DATA[DATA$blot == "p-GCKR",]$difference,
GCKR = DATA[DATA$blot == "GCKR",]$difference)
dt$change <- dt$pGCKR/dt$GCKR
dt
## treatment pGCKR GCKR change
## 1: HR 44.629 35.576 1.2544693
## 2: HR 56.596 35.441 1.5969075
## 3: HR 85.922 43.444 1.9777645
## 4: LR 33.218 46.821 0.7094680
## 5: LR 28.724 45.056 0.6375178
## 6: LR 32.038 42.219 0.7588526
# DT_graph <- melt(norm_raw, id.vars = "Gene", measure.vars = c("norm_HR", "norm_LR")); DT_graph
# dt$treatment <- factor(dt$treatment, levels= c("WT", "KO"),labels = c("WT", "KO"))
p<- ggplot(dt, aes(x=treatment, y=change, fill=treatment)) + #
# geom_violin()+ #, "#D95F02", "#7570B3" width = 1.8, fill = c("#1B9E77")
geom_boxplot()+ #fill = c("#1B9E77"), width=0.9
# geom_bar(stat="identity", position=position_dodge()) +
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test")+ # Add pairwise comparisons p-value
ylim(c(0, 2.5))
p
####################################
#### HRLR HK GCKR western blot #####
####################################
DATA <- fread("./results/99 western quantification2.csv", sep=",")
DATA <- DATA[DATA$experiment == "HRLR_HK_GCKR",]
#### take differences between background and densitometry value ####
DATA$difference <- DATA$background - DATA$densitometry
DATA
## experiment blot treatment densitometry background difference
## 1: HRLR_HK_GCKR HK HR 70.397 119.252 48.855
## 2: HRLR_HK_GCKR HK HR 75.411 119.252 43.841
## 3: HRLR_HK_GCKR HK HR 68.181 119.252 51.071
## 4: HRLR_HK_GCKR HK LR 84.497 119.252 34.755
## 5: HRLR_HK_GCKR HK LR 88.058 119.252 31.194
## 6: HRLR_HK_GCKR HK LR 91.543 119.252 27.709
## 7: HRLR_HK_GCKR GCKR HR 79.794 119.252 39.458
## 8: HRLR_HK_GCKR GCKR HR 78.736 119.252 40.516
## 9: HRLR_HK_GCKR GCKR HR 83.041 119.252 36.211
## 10: HRLR_HK_GCKR GCKR LR 75.625 119.252 43.627
## 11: HRLR_HK_GCKR GCKR LR 59.903 119.252 59.349
## 12: HRLR_HK_GCKR GCKR LR 77.710 119.252 41.542
#### Construct data table
dt <- data.table(treatment = DATA[DATA$blot == "GCKR",]$treatment,
HK = DATA[DATA$blot == "HK",]$difference,
GCKR = DATA[DATA$blot == "GCKR",]$difference)
dt$change <- dt$HK/dt$GCKR
dt
## treatment HK GCKR change
## 1: HR 48.855 39.458 1.2381520
## 2: HR 43.841 40.516 1.0820663
## 3: HR 51.071 36.211 1.4103725
## 4: LR 34.755 43.627 0.7966397
## 5: LR 31.194 59.349 0.5256028
## 6: LR 27.709 41.542 0.6670117
dt <- rbind(dt[dt$treatment == "HR"], dt[dt$treatment == "LR"])
dt$FC <- c((dt[dt$treatment == 'HR']$change/dt[dt$treatment == 'LR']$change), rep(1, 3))
dt
## treatment HK GCKR change FC
## 1: HR 48.855 39.458 1.2381520 1.554218
## 2: HR 43.841 40.516 1.0820663 2.058715
## 3: HR 51.071 36.211 1.4103725 2.114464
## 4: LR 34.755 43.627 0.7966397 1.000000
## 5: LR 31.194 59.349 0.5256028 1.000000
## 6: LR 27.709 41.542 0.6670117 1.000000
dt$treatment <- factor(dt$treatment, levels= c("HR", "LR"),labels = c("HR", "LR"))
p<- ggplot(dt, aes(x=treatment, y=FC, fill=treatment)) +
geom_boxplot()+
scale_fill_brewer(palette="Dark2")+
geom_point(alpha = 0.9, shape = 21, size = 3, colour = "black",
position = position_dodge2(preserve = "single", width = 0.7))+ #, fill = "black"
theme_pubr()+
stat_compare_means(method = "t.test")+ # Add pairwise comparisons p-value
ylim(c(0, 3))
p
DT <- fread("./results/fig. X HR mice hexokinase activity assay.csv")
library(RColorBrewer)
brewer.pal(9, "Reds")
## [1] "#FFF5F0" "#FEE0D2" "#FCBBA1" "#FC9272" "#FB6A4A" "#EF3B2C" "#CB181D" "#A50F15" "#67000D"
brewer.pal(9, "Blues")
## [1] "#F7FBFF" "#DEEBF7" "#C6DBEF" "#9ECAE1" "#6BAED6" "#4292C6" "#2171B5" "#08519C" "#08306B"
p <- ggviolin(DT, x = "group", y = "Binding", fill = "group",
palette = c("#2171B5", "#A50F15"), #"npg",
add = "boxplot", add.params = list(fill = "white"));p