rm(list = ls())
###############################input data
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\"
dir_path_name <- list.files(pattern = ".*csv",dir_path,full.names = T, recursive = F)
dir_path_name
## [1] "C:\\Users\\liyix\\OneDrive\\Desktop\\cpe_tox.csv"
data_1 <- read.csv(grep("cpe_tox.csv",dir_path_name,value = T),header = T,stringsAsFactors = F)
#dim(data_1) #[1] 122 10
#colnames(data_1)
data_1 <- data_1[, c("Mapping.ID", "AC50..uM._CPE", "AC50..uM._tox","Sample.Name")]
#head(data_1)
data_1 <- data_1[1:10, ]
data_1$Sample.Name[data_1$Sample.Name != ""] <- 1
data_1$Sample.Name[data_1$Sample.Name == ""] <- 0
order_1 <- c(data_1$Mapping.ID[data_1$Sample.Name == 1],data_1$Mapping.ID[data_1$Sample.Name == 0])
#table(data_1$Sample.Name) # 0 1 67 55
length(data_1$Mapping.ID[data_1$Sample.Name == 1])
## [1] 2
#########################################
#head(data_1)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.6 v dplyr 1.0.4
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: package 'tidyr' was built under R version 4.0.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
data_2 <- gather(data_1, key = key, value = value, -c(1,4))
data_2
## Mapping.ID Sample.Name key value
## 1 AHUYPYUXDNUFDG 0 AC50..uM._CPE 4.466836
## 2 AJZJIYUOOJLBAU 1 AC50..uM._CPE 12.589254
## 3 ARAATGYGKVFJCT 0 AC50..uM._CPE 15.848932
## 4 AYMYIKSUYDGIEW 0 AC50..uM._CPE 12.589254
## 5 BEIGZMUAVKDCQI 0 AC50..uM._CPE 14.125375
## 6 BSWDFNCVXNVPFQ 0 AC50..uM._CPE 11.220185
## 7 BZKSQQXINBVDCB 0 AC50..uM._CPE 12.589254
## 8 CLDIUVXCUVQLGD 1 AC50..uM._CPE 11.220185
## 9 CMLVHSVFSYNMGM 0 AC50..uM._CPE 12.589254
## 10 CMRBIFUJNTUGED 0 AC50..uM._CPE 10.000000
## 11 AHUYPYUXDNUFDG 0 AC50..uM._tox 8.912509
## 12 AJZJIYUOOJLBAU 1 AC50..uM._tox NA
## 13 ARAATGYGKVFJCT 0 AC50..uM._tox NA
## 14 AYMYIKSUYDGIEW 0 AC50..uM._tox NA
## 15 BEIGZMUAVKDCQI 0 AC50..uM._tox 12.589254
## 16 BSWDFNCVXNVPFQ 0 AC50..uM._tox NA
## 17 BZKSQQXINBVDCB 0 AC50..uM._tox NA
## 18 CLDIUVXCUVQLGD 1 AC50..uM._tox 8.971641
## 19 CMLVHSVFSYNMGM 0 AC50..uM._tox NA
## 20 CMRBIFUJNTUGED 0 AC50..uM._tox NA
#head(data_2)
data_2$key <- gsub("AC50..uM._", "", data_2$key)
#unique(data_2$key)
data_2$key <- gsub("tox", "TOX", data_2$key)
#########################################
head(data_2)
## Mapping.ID Sample.Name key value
## 1 AHUYPYUXDNUFDG 0 CPE 4.466836
## 2 AJZJIYUOOJLBAU 1 CPE 12.589254
## 3 ARAATGYGKVFJCT 0 CPE 15.848932
## 4 AYMYIKSUYDGIEW 0 CPE 12.589254
## 5 BEIGZMUAVKDCQI 0 CPE 14.125375
## 6 BSWDFNCVXNVPFQ 0 CPE 11.220185
data_2$Mapping.ID <- factor(data_2$Mapping.ID, levels = order_1)
data_2$key <- factor(data_2$key)
row_num_1 = length(unique(data_2$key))
str(data_2)
## 'data.frame': 20 obs. of 4 variables:
## $ Mapping.ID : Factor w/ 10 levels "AJZJIYUOOJLBAU",..: 3 1 4 5 6 7 8 2 9 10 ...
## $ Sample.Name: chr "0" "1" "0" "0" ...
## $ key : Factor w/ 2 levels "CPE","TOX": 1 1 1 1 1 1 1 1 1 1 ...
## $ value : num 4.47 12.59 15.85 12.59 14.13 ...
as.numeric(data_2$key)
## [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2
dim(data_2)
## [1] 20 4
#view(data_2)
data_2$value <- log10(data_2$value*1e-6)
unique(data_2$Mapping.ID[data_2$Sample.Name == 1])
## [1] AJZJIYUOOJLBAU CLDIUVXCUVQLGD
## 10 Levels: AJZJIYUOOJLBAU CLDIUVXCUVQLGD AHUYPYUXDNUFDG ... CMRBIFUJNTUGED
data_2[data_2$Sample.Name == 1, ]
## Mapping.ID Sample.Name key value
## 2 AJZJIYUOOJLBAU 1 CPE -4.900000
## 8 CLDIUVXCUVQLGD 1 CPE -4.950000
## 12 AJZJIYUOOJLBAU 1 TOX NA
## 18 CLDIUVXCUVQLGD 1 TOX -5.047128
#AJZJIYUOOJLBAU CLDIUVXCUVQLGD
p1 <- ggplot(data_2,aes(x= Mapping.ID,y= as.numeric(key),fill=value)) +
xlim(c("1",order_1)) +
ylim(c(-row_num_1+1,row_num_1+1)) +
geom_tile(colour="white") + ylab("") +
annotate(x = "1",y= 1:row_num_1,label=levels(data_2$key),size=2.5,geom="text") +
scale_fill_gradient(expression(paste("Log(",AC[50],", M)")),
low = "skyblue",high ="darkred", na.value = "grey90",trans = 'reverse')
p1 + theme(axis.text = element_text(angle = 90, hjust = 1))

p2 <- p1 + coord_polar(start = -0.15) + theme_void()
p2

p3 <- p2 + geom_segment(mapping = aes(x = 1.5, y = 2.7,
xend = length(data_1$Mapping.ID[data_1$Sample.Name == 1]) +1.5,
yend = 2.7,color = "known"), size = 1) +
geom_segment(mapping = aes(x = length(data_1$Mapping.ID[data_1$Sample.Name == 1]) + 1.5,
y = 2.7, xend = length(unique(Mapping.ID)) + 1.5,
yend = 2.7,color = "unknown"), size = 1)
p3

p4 <- p3 + scale_color_manual(name = "Category",values = c( "lightgoldenrod","lightgreen"),
labels=c( "known","unknown")) +
theme(legend.position = c(0.5, 0.5),
legend.key.size = unit(0.3, "cm"),
legend.title=element_text(size= 9),
#legend.title = expression(paste("Log (",AC[50],")")),
axis.text = element_blank(), axis.title = element_blank(),
axis.ticks = element_blank(),
legend.box = "vertical")
p4

#p4 + guides(colour = guide_colourbar(order = 1))
#guides(fill = guide_legend(order = 1))
###########################output
ggsave(filename = paste0(Sys.Date(),"-","-cpe.tif"),
plot = last_plot(), device = "tiff", path = NULL,
width =13, height = 13, units = "cm",
dpi = 300, limitsize = TRUE, compression = "lzw")
ggsave(filename = paste0(Sys.Date(),"-","-cpe.pdf"),
plot = last_plot(), device = "pdf", path = NULL,
width = 15, height = 25, units = "cm")