rm(list = ls())
###############################input data
Sys.setlocale("LC_ALL", "C")
## [1] "C"
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\chromosome\\"
dir_path_name <- dir(dir_path,pattern = "2019-01-23-gene_list_chromosone_location.csv")
#dir_path_name
##############################signal input
dir_path_file <- paste0(dir_path,dir_path_name)
#getSheetNames(dir_path_file)
object_file <- read.csv(dir_path_file,header = T,stringsAsFactors = F)
dim(object_file) #[1] 8190 9
## [1] 8190 9
#colnames(object_file)
#object_file[is.na(object_file$CHRLOCEND),]
#head(object_file)
#length(object_file$ENTREZID[object_file$ANOVAP.VALUE.TCDDVSDMSO. < 0.05])
#length(unique(object_file$ENTREZID)) #[1] 4377
#object_file <- object_file[!duplicated(object_file$ENTREZID),]
#dim(object_file);head(object_file);table(object_file$CHRLOCCHR)
###################################################
library(RIdeogram)
###########1
data(human_karyotype, package="RIdeogram")
#colnames(object_file)
dim(human_karyotype); head(human_karyotype)
## [1] 24 5
## Chr Start End CE_start CE_end
## 1 1 0 248956422 122026459 124932724
## 2 2 0 242193529 92188145 94090557
## 3 3 0 198295559 90772458 93655574
## 4 4 0 190214555 49712061 51743951
## 5 5 0 181538259 46485900 50059807
## 6 6 0 170805979 58553888 59829934
###########2.1
gene_density_mydata <- object_file[,c("ENTREZID","CHRLOCCHR","CHRLOC","CHRLOCEND","FOLDCHANGE.TCDDVSDMSO.")]
gene_density_mydata <- na.omit(gene_density_mydata)
#dim(gene_density_mydata) #[1] 4371 5
head(gene_density_mydata); dim(gene_density_mydata) #[1] 4377 5
## ENTREZID CHRLOCCHR CHRLOC CHRLOCEND FOLDCHANGE.TCDDVSDMSO.
## 1 100132932 Y 6258332 6263935 -1.08
## 2 100289150 Y 9349579 9355182 -1.12
## 3 100289150 Y 9479052 9484654 -1.12
## 4 100289150 Y 9458742 9464345 -1.12
## 5 100289150 Y 9519665 9525268 -1.12
## 6 100289150 Y 9369856 9375462 -1.12
## [1] 8184 5
##2.2
library(reshape2)
gene_list <- list()
for (i in 1:length(unique(gene_density_mydata$CHRLOCCHR))) {
gene_density_chr <- gene_density_mydata[which(gene_density_mydata$CHRLOCCHR == unique(gene_density_mydata$CHRLOCCHR)[i]),]
a1 <- melt(table(cut(gene_density_chr$CHRLOC,breaks=c(1,seq(1000000,248000000,by =1000000)))))
a2 <- data.frame(sapply(a1,function(x) gsub("\\(|\\]","",gsub("\\,","-",x))))
colnames(a2)<-c("numbers","Freq")
a2$Chr <- unique(gene_density_mydata$CHRLOCCHR)[i]
a2$numbers <- as.character(a2$numbers)
gene_list[[i]] <- a2
}
#str(a2)
#head(a2)
#class(a2)
#View(gene_list)
gene_list_data <- do.call("rbind",gene_list)
head(gene_list_data); dim(gene_list_data) #[1] 5952 3
## numbers Freq Chr
## 1 1-1e+06 0 Y
## 2 1e+06-2e+06 1 Y
## 3 2e+06-3e+06 5 Y
## 4 3e+06-4e+06 0 Y
## 5 4e+06-5e+06 0 Y
## 6 5e+06-6e+06 0 Y
## [1] 5952 3
#View(gene_list_data)
###########
start <- end <- NULL
for (i in 1:nrow(gene_list_data)) {
start <- c(start,unlist(strsplit(gene_list_data$numbers[i],"-"))[1])
end <- c(end,unlist(strsplit(gene_list_data$numbers[i],"-"))[2])
}
gene_list_data$Start <- start
gene_list_data$End <- end
#head(gene_list_data)
colnames(gene_list_data)
## [1] "numbers" "Freq" "Chr" "Start" "End"
gene_list_data_my <- gene_list_data[,c("Chr","Start","End","Freq")]
gene_list_data_my$Start <- as.numeric(gene_list_data_my$Start)
gene_list_data_my$End <- as.numeric(gene_list_data_my$End)
gene_list_data_my$Freq <- as.numeric(as.character(gene_list_data_my$Freq))
#head(gene_list_data_my)
#gene_list_data_my$Start[2]
gene_list_data_my$Chr <- as.character(gene_list_data_my$Chr)
#str(gene_list_data_my)
#dim(gene_list_data_my) #[1] 248 4
gene_list_data_my_1 <- gene_list_data_my[which(gene_list_data_my$Freq != 0),]
#dim(gene_list_data_my_1)
gene_list_data_my_1$Start <- format(gene_list_data_my_1$Start, scientific = FALSE)
gene_list_data_my_1$End <- format(gene_list_data_my_1$End, scientific = FALSE)
#head(gene_list_data_my_1)
#str(gene_list_data_my_1)
gene_list_data_my_1$Start <- as.integer(gene_list_data_my_1$Start)
gene_list_data_my_1$End <- as.integer(gene_list_data_my_1$End)
gene_list_data_my_1$Freq <- as.integer(gene_list_data_my_1$Freq)
colnames(gene_list_data_my_1) <- gsub("Freq","Value",colnames(gene_list_data_my_1))
#str(gene_list_data_my_1)
##########2.3
foldchange_1.5 <- gene_density_mydata[which(abs(gene_density_mydata$FOLDCHANGE.TCDDVSDMSO.) >= 1.5),]
for (i in 1:nrow(foldchange_1.5)) {
if (foldchange_1.5$FOLDCHANGE.TCDDVSDMSO.[i] >= 1.5) {
foldchange_1.5$Type[i] <- "up_regulated genes (>1.5-fold)"
foldchange_1.5$Shape[i] <- "circle"
foldchange_1.5$color[i] <- "FF4500"
}else{foldchange_1.5$Type[i] <- "down_regulated genes (>1.5-fold)"
foldchange_1.5$Shape[i] <- "box"
foldchange_1.5$color[i] <- "7FFF00"
}
}
foldchange_1.5 <- foldchange_1.5[,c("CHRLOCCHR","CHRLOC","CHRLOCEND","Type","Shape","color")]
colnames(foldchange_1.5) <- c("Chr","Start","End","Type","Shape","color")
foldchange_1.5 <- foldchange_1.5[,c("Type","Shape","Chr","Start","End","color")]
###########################plot
setwd("C:\\Users\\liyix\\OneDrive\\Desktop\\")
ideogram(karyotype = human_karyotype, overlaid = gene_list_data_my_1,
label = foldchange_1.5,width = 170,
Lx = 130, Ly = 35,
colorset1 = c("#fc8d59", "#ffffbf", "#91bfdb"),
label_type = "marker")
setwd("C:\\Users\\liyix\\OneDrive\\Desktop\\")
#convertSVG("chromosome.svg", device = "pdf", dpi = 600,width = 12,
# height = 6)
#dim(gene_list_data_my_1)
ideogram(karyotype = human_karyotype, overlaid = gene_list_data_my_1,
label = NULL,width = 170,
Lx = 130, Ly = 35,
colorset1 = c("#fc8d59", "#ffffbf", "#91bfdb"),
label_type = "marker")
convertSVG("chromosome.svg", device = "pdf", dpi = 300,width = 12,
height = 6)
#ref https://github.com/TickingClock1992/RIdeogram/issues/9
# https://cran.r-project.org/web/packages/RIdeogram/vignettes/RIdeogram.html