################################################
rm(list = ls())
###############################input data 
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\data\\"
dir_path_name <- list.files(pattern = ".*csv",dir_path,full.names = T, recursive = T)
dir_path_name
## [1] "C:\\Users\\liyix\\OneDrive\\Desktop\\data\\data.csv"
data_1 <- read.csv(grep("data.csv",dir_path_name,value = T),header = T,stringsAsFactors = F)
data_1 <- data_1[1:1000, ]
#dim(data_1) #[1] 29646     4
#View(data_1)
# head(data_1)
# dim(data_1) #[1] 244  12
# length(unique(c(data_1$compound_1, data_1$compound_2))) #244
# length(unique(c(data_1$compound_1))) #[1] 243
# data_11 <- data_1
# colnames(data_11)[c(1, 2)] <- colnames(data_11)[c(2,1)]
# data_1 <- unique(rbind(data_1, data_11))
# length(unique(c(data_1$compound_1))) #[1] 243
# length(unique(data_1$compound_2))
########################################
##################################################2___HM-P-VALUE
#colnames(data_1)
head(data_1)
##       compound_1     compound_2       p.value        cc number_assay
## 1 VFCNQNZNPKRXIT OPQNCARIZFLNLF  8.284198e-04 0.2649935          156
## 2 VFCNQNZNPKRXIT NISPVUDLMHQFRQ  1.807323e-16 0.5783376          169
## 3 VFCNQNZNPKRXIT LLEJIEBFSOEYIV  1.395366e-15 0.4432776          294
## 4 VFCNQNZNPKRXIT NMTNUQBORQILRK  1.871712e-06 0.3731908          154
## 5 VFCNQNZNPKRXIT KYRVNWMVYQXFEU  2.142687e-57 0.3700334         1734
## 6 VFCNQNZNPKRXIT RJMUSRYZPJIFPJ 1.739109e-117 0.5058473         1799
library(ggplot2)
## Warning: 程辑包'ggplot2'是用R版本4.2.3 来建造的
data_1$no  <- "a"
data_1$no[data_1$cc < 0] <- "Negative"
data_1$no[data_1$cc >= 0] <- "Postive"
unique(data_1$no)
## [1] "Postive"  "Negative"
table(data_1$no)
## 
## Negative  Postive 
##      113      887
data_1 <- na.omit(data_1)
ggplot(data_1, aes(cc, fill=no))+
  geom_histogram(aes(y=..density..), color='gray50',
                 alpha=0.2, binwidth=0.05, position = "identity") +
  geom_density(alpha=0.2)
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## i Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

min(data_1$cc)
## [1] -0.4716621
max(data_1$cc)
## [1] 0.9444525
#######################################################################
ggplot(data_1, aes(cc)) +
  geom_histogram(aes(y=..density.., fill=no),
                 color='gray50', alpha = 0.6,
                 binwidth=0.02, position = "identity")+
  geom_density(alpha=1) 

head(data_1)
##       compound_1     compound_2       p.value        cc number_assay      no
## 1 VFCNQNZNPKRXIT OPQNCARIZFLNLF  8.284198e-04 0.2649935          156 Postive
## 2 VFCNQNZNPKRXIT NISPVUDLMHQFRQ  1.807323e-16 0.5783376          169 Postive
## 3 VFCNQNZNPKRXIT LLEJIEBFSOEYIV  1.395366e-15 0.4432776          294 Postive
## 4 VFCNQNZNPKRXIT NMTNUQBORQILRK  1.871712e-06 0.3731908          154 Postive
## 5 VFCNQNZNPKRXIT KYRVNWMVYQXFEU  2.142687e-57 0.3700334         1734 Postive
## 6 VFCNQNZNPKRXIT RJMUSRYZPJIFPJ 1.739109e-117 0.5058473         1799 Postive
ggplot(data_1, aes(cc)) + 
  geom_histogram(aes(y=..density.., fill=no), 
                 color='gray90', 
                 alpha = 0.7, 
                 binwidth=0.05, 
                 position = "identity") +
  geom_density(alpha = 0.1,
               color="darkred", size = 0.6,        
               linetype="dashed") +
  scale_fill_manual(values=c( "#BDEEC4", "#FFB3B3")) +
  labs(
    x = "Correlation Coefficient",               # X轴标题
    y = "Density",                      # Y轴标题
    fill = "Category"                   # 图例标题
  ) +
  scale_x_continuous(
    limits = c(-.8, 1.1),                  # X轴范围
    breaks = seq(-.8, 1.1, 0.4),
    expand = c(0, 0)# X轴刻度间隔
  ) +
  scale_y_continuous(
    limits = c(0, 6),                   # Y轴范围
    breaks = seq(0, 6, 1),
    expand = c(0, 0)# Y轴刻度间隔
  ) +
  theme_classic() + 
  theme(
    panel.grid = element_blank(),      
    legend.position = "top",           
    legend.title = element_text(size=15, face="bold"),    # 图例标题字体
    legend.text = element_text(size=15),                  # 图例文本字体
    legend.background = element_rect(fill="white"),       # 图例背景
    legend.key.size = unit(0.5, "cm"),                     # 图例符号大小
    legend.margin = margin(t=5, b=5, l=5, r=5),          # 图例边距
    legend.box.spacing = unit(0.5, "cm"),                # 图例与图形间距
    axis.text = element_text(size=15),  
    axis.title = element_text(size=15)  
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 4 rows containing missing values (`geom_bar()`).

####################################################
ggplot(data_1, aes(cc)) + 
  geom_histogram(aes(y=..density.., fill=no), 
                 color='gray90', 
                 alpha = 0.7, 
                 binwidth=0.05, 
                 position = "identity") +
  geom_density(aes(linetype="Overall"),
               alpha = 0.1,
               color="darkred",
               size = 0.6) +
  scale_fill_manual(values=c("#BDEEC4", "#FFB3B3")) +
  scale_linetype_manual(values=c("Overall"="dashed")) +
  guides(
    fill = guide_legend(title = "Category", 
                        order = 1,
                        override.aes = list(color = "gray90")),
    linetype = guide_legend(title = NULL, 
                            order = 2,
                            keywidth = unit(2.5, "lines"),  # 调整线的长度
                            label.hjust = 0,  # 调整文字位置
                            label.position = "right")) +
  labs(
    x = "Correlation Coefficient",
    y = "Density"
  ) +
  scale_x_continuous(
    limits = c(-.8, 1.1),
    breaks = seq(-.8, 1.1, 0.4),
    expand = c(0, 0)
  ) +
  scale_y_continuous(
    limits = c(0, 6),
    breaks = seq(0, 6, 1),
    expand = c(0, 0)
  ) +
  theme_classic() + 
  theme(
    panel.grid = element_blank(),      
    legend.position = "top",           
    legend.title = element_text(size=15, face="bold"),
    legend.text = element_text(size=15),
    legend.background = element_rect(fill="white"),
    legend.key.size = unit(0.5, "cm"),
    legend.margin = margin(t=5, b=5, l=5, r=5),
    legend.box.spacing = unit(0.5, "cm"),
    legend.spacing.x = unit(0.2, "cm"),  # 调整图例之间的水平间距
    legend.box = "horizontal",
    axis.text = element_text(size=15, color = "black"),  
    axis.title = element_text(size=15, color = "black")  
  )
## Warning: Removed 4 rows containing missing values (`geom_bar()`).

ggsave(filename = paste0(Sys.Date(),"-data.tif"), 
       plot = last_plot(), device = "tiff", path = dir_path,
       width = 14, height = 11, units = "cm",
       dpi = 300, limitsize = TRUE)
## Warning: Removed 4 rows containing missing values (`geom_bar()`).