################################################
rm(list = ls())
###############################input data
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\data\\"
dir_path_name <- list.files(pattern = ".*csv",dir_path,full.names = T, recursive = T)
dir_path_name
## [1] "C:\\Users\\liyix\\OneDrive\\Desktop\\data\\data.csv"
data_1 <- read.csv(grep("data.csv",dir_path_name,value = T),header = T,stringsAsFactors = F)
data_1 <- data_1[1:1000, ]
#dim(data_1) #[1] 29646 4
#View(data_1)
# head(data_1)
# dim(data_1) #[1] 244 12
# length(unique(c(data_1$compound_1, data_1$compound_2))) #244
# length(unique(c(data_1$compound_1))) #[1] 243
# data_11 <- data_1
# colnames(data_11)[c(1, 2)] <- colnames(data_11)[c(2,1)]
# data_1 <- unique(rbind(data_1, data_11))
# length(unique(c(data_1$compound_1))) #[1] 243
# length(unique(data_1$compound_2))
########################################
##################################################2___HM-P-VALUE
#colnames(data_1)
head(data_1)
## compound_1 compound_2 p.value cc number_assay
## 1 VFCNQNZNPKRXIT OPQNCARIZFLNLF 8.284198e-04 0.2649935 156
## 2 VFCNQNZNPKRXIT NISPVUDLMHQFRQ 1.807323e-16 0.5783376 169
## 3 VFCNQNZNPKRXIT LLEJIEBFSOEYIV 1.395366e-15 0.4432776 294
## 4 VFCNQNZNPKRXIT NMTNUQBORQILRK 1.871712e-06 0.3731908 154
## 5 VFCNQNZNPKRXIT KYRVNWMVYQXFEU 2.142687e-57 0.3700334 1734
## 6 VFCNQNZNPKRXIT RJMUSRYZPJIFPJ 1.739109e-117 0.5058473 1799
library(ggplot2)
## Warning: 程辑包'ggplot2'是用R版本4.2.3 来建造的
data_1$no <- "a"
data_1$no[data_1$cc < 0] <- "Negative"
data_1$no[data_1$cc >= 0] <- "Postive"
unique(data_1$no)
## [1] "Postive" "Negative"
table(data_1$no)
##
## Negative Postive
## 113 887
data_1 <- na.omit(data_1)
ggplot(data_1, aes(cc, fill=no))+
geom_histogram(aes(y=..density..), color='gray50',
alpha=0.2, binwidth=0.05, position = "identity") +
geom_density(alpha=0.2)
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## i Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

min(data_1$cc)
## [1] -0.4716621
max(data_1$cc)
## [1] 0.9444525
#######################################################################
ggplot(data_1, aes(cc)) +
geom_histogram(aes(y=..density.., fill=no),
color='gray50', alpha = 0.6,
binwidth=0.02, position = "identity")+
geom_density(alpha=1)

head(data_1)
## compound_1 compound_2 p.value cc number_assay no
## 1 VFCNQNZNPKRXIT OPQNCARIZFLNLF 8.284198e-04 0.2649935 156 Postive
## 2 VFCNQNZNPKRXIT NISPVUDLMHQFRQ 1.807323e-16 0.5783376 169 Postive
## 3 VFCNQNZNPKRXIT LLEJIEBFSOEYIV 1.395366e-15 0.4432776 294 Postive
## 4 VFCNQNZNPKRXIT NMTNUQBORQILRK 1.871712e-06 0.3731908 154 Postive
## 5 VFCNQNZNPKRXIT KYRVNWMVYQXFEU 2.142687e-57 0.3700334 1734 Postive
## 6 VFCNQNZNPKRXIT RJMUSRYZPJIFPJ 1.739109e-117 0.5058473 1799 Postive
ggplot(data_1, aes(cc)) +
geom_histogram(aes(y=..density.., fill=no),
color='gray90',
alpha = 0.7,
binwidth=0.05,
position = "identity") +
geom_density(alpha = 0.1,
color="darkred", size = 0.6,
linetype="dashed") +
scale_fill_manual(values=c( "#BDEEC4", "#FFB3B3")) +
labs(
x = "Correlation Coefficient", # X轴标题
y = "Density", # Y轴标题
fill = "Category" # 图例标题
) +
scale_x_continuous(
limits = c(-.8, 1.1), # X轴范围
breaks = seq(-.8, 1.1, 0.4),
expand = c(0, 0)# X轴刻度间隔
) +
scale_y_continuous(
limits = c(0, 6), # Y轴范围
breaks = seq(0, 6, 1),
expand = c(0, 0)# Y轴刻度间隔
) +
theme_classic() +
theme(
panel.grid = element_blank(),
legend.position = "top",
legend.title = element_text(size=15, face="bold"), # 图例标题字体
legend.text = element_text(size=15), # 图例文本字体
legend.background = element_rect(fill="white"), # 图例背景
legend.key.size = unit(0.5, "cm"), # 图例符号大小
legend.margin = margin(t=5, b=5, l=5, r=5), # 图例边距
legend.box.spacing = unit(0.5, "cm"), # 图例与图形间距
axis.text = element_text(size=15),
axis.title = element_text(size=15)
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 4 rows containing missing values (`geom_bar()`).

####################################################
ggplot(data_1, aes(cc)) +
geom_histogram(aes(y=..density.., fill=no),
color='gray90',
alpha = 0.7,
binwidth=0.05,
position = "identity") +
geom_density(aes(linetype="Overall"),
alpha = 0.1,
color="darkred",
size = 0.6) +
scale_fill_manual(values=c("#BDEEC4", "#FFB3B3")) +
scale_linetype_manual(values=c("Overall"="dashed")) +
guides(
fill = guide_legend(title = "Category",
order = 1,
override.aes = list(color = "gray90")),
linetype = guide_legend(title = NULL,
order = 2,
keywidth = unit(2.5, "lines"), # 调整线的长度
label.hjust = 0, # 调整文字位置
label.position = "right")) +
labs(
x = "Correlation Coefficient",
y = "Density"
) +
scale_x_continuous(
limits = c(-.8, 1.1),
breaks = seq(-.8, 1.1, 0.4),
expand = c(0, 0)
) +
scale_y_continuous(
limits = c(0, 6),
breaks = seq(0, 6, 1),
expand = c(0, 0)
) +
theme_classic() +
theme(
panel.grid = element_blank(),
legend.position = "top",
legend.title = element_text(size=15, face="bold"),
legend.text = element_text(size=15),
legend.background = element_rect(fill="white"),
legend.key.size = unit(0.5, "cm"),
legend.margin = margin(t=5, b=5, l=5, r=5),
legend.box.spacing = unit(0.5, "cm"),
legend.spacing.x = unit(0.2, "cm"), # 调整图例之间的水平间距
legend.box = "horizontal",
axis.text = element_text(size=15, color = "black"),
axis.title = element_text(size=15, color = "black")
)
## Warning: Removed 4 rows containing missing values (`geom_bar()`).

ggsave(filename = paste0(Sys.Date(),"-data.tif"),
plot = last_plot(), device = "tiff", path = dir_path,
width = 14, height = 11, units = "cm",
dpi = 300, limitsize = TRUE)
## Warning: Removed 4 rows containing missing values (`geom_bar()`).