#install.packages("remotes")
#Sys.setenv("TAR" = "internal")
#remotes::install_github("yikeshu0611/oncomineR")
# 调用oncomineR包
rm(list = ls())
library(ggplot2)
## Registered S3 methods overwritten by 'tibble':
## method from
## format.tbl pillar
## print.tbl pillar
library(oncomineR)
library(ggprism)
################# ##############input data
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\"
dir_path_name <- list.files(pattern = ".*mhtml",dir_path,full.names = F, recursive = F)
dir_path_name
## [1] "Acute Myeloid Leukemia_1.mhtml" "Acute Myeloid Leukemia_2.mhtml"
## [3] "Acute Myeloid Leukemia_3.mhtml"
length(dir_path_name) #[1] 41
## [1] 3
dir_path_name_1 <- unique(gsub("_[[:digit:]]", "", dir_path_name))
dir_path_name_2 <- gsub(" \\.", ".", dir_path_name_1)
dir_path_name_2
## [1] "Acute Myeloid Leukemia.mhtml"
dir_path_name_3 <- unique(gsub(".mhtml", "", dir_path_name_2))
length(unique(dir_path_name_1)) #15
## [1] 1
library(oncomineR)
data_pvalue <- list()
for (i in 1:length(dir_path_name_3)) {
#i = 9
print(i)
#i = 2
name <- grep(dir_path_name_3[i], list.files(pattern = ".*mhtml",dir_path,full.names = T, recursive = F), value = T)
data_list <- list()
for (j in 1:length(name)) {
data_list[[j]] = oncomine(name[j])
#print(dim(data_list[[j]]))
}
data_2 <- do.call("rbind", data_list)
dim(data_2) #[1] 48 4
data_3 <- unique(data_2)
dim(data_3)
table(data_3$`Legend Value`)
dir_out <- "C:\\Users\\liyix\\OneDrive\\Desktop\\2021\\2021_HTS_ACHE\\data_plot_down\\"
write.csv(data_3, paste0(dir_out,Sys.Date(),"-",dir_path_name_3[i],"_data_for_plot.csv"),row.names = FALSE)
n_stat <- data.frame(table(data_3$`Legend Value`, data_3$`Sample Name`))
n_stat <- n_stat[n_stat$Freq != "0",]
table(n_stat$Var1)
write.csv(data.frame(table(n_stat$Var1)), paste0(dir_out,Sys.Date(),"-",dir_path_name_3[i],"_data_for_plot_n.csv"),row.names = FALSE)
######################################################plot
#head(data_3)
#colnames(data_3)
#str(data_3)
#unique(data_3$`Legend Value`)
#colnames(data_3)[2] <- "category"
data_3$`Expression value` <- as.numeric(data_3$`Expression value`)
head(data_3)
data_3 <- data_3[data_3$`Legend Value` != "CD34-Positive Peripheral Blood Cell", ]
data_3 <- data_3[data_3$`Legend Value` != "Rectum", ]
data_3 <- data_3[data_3$`Legend Value` != "Ascending Colon" &
data_3$`Legend Value` != "Descending Colon" &
data_3$`Legend Value` != "Rectum" &
data_3$`Legend Value` != "Transverse Colon", ]
unique(data_3$`Legend Value`)
# make a plot
p1 <- ggplot(data_3, aes(x = factor(`Legend Value`), y = `Expression value`)) +
geom_boxplot(aes(fill = factor(`Legend Value`))) +
#scale_fill_prism() +
scale_colour_prism() +
theme_prism(base_size = 15) +
theme(legend.position = "none",
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1,vjust = 1)) +
scale_fill_manual(values = c("#00AFBB", "#E7B800"))
p1
# p1
###########add pvalue
cate_1 <- unique(data_3$`Normal Tissue Type`)[1]
cate_2 <- unique(data_3$`Normal Tissue Type`)[2]
data_p1 <- data_3[data_3$`Normal Tissue Type` == cate_1, ]$`Expression value`
data_p2 <- data_3[data_3$`Normal Tissue Type` == cate_2, ]$`Expression value`
df_p_val <- data.frame(
group1 = cate_1,
group2 = cate_2,
p.adj = paste0("P = ",round(t.test(data_p1, data_p2)$p.value,digits = 2)),
y.position = max(data_3$`Expression value`) + 0.5
)
df_p_val
data_pvalue[[i]] <- df_p_val
ifelse(df_p_val$p.adj == "P = 0", df_p_val$p.adj <- "P < 0.01", df_p_val$p.adj <- df_p_val$p.adj)
# add the p-value
p2 <- p1 + add_pvalue(df_p_val)
#p2
ggsave(filename = paste0(Sys.Date(),"-",dir_path_name_3[i],".tif"),
plot = last_plot(), device = "tiff", path = dir_out,
width = 17, height = 17, units = "cm",
dpi = 300, limitsize = TRUE,compression = "lzw")
print(p2)
}
## [1] 1

data_p <- do.call("rbind", data_pvalue)
data_p$y.position <- NULL
write.csv(data_p, paste0(dir_out,Sys.Date(),"_data_pvalue.csv"),row.names = FALSE)
######################################################plot