#install.packages("remotes")
#Sys.setenv("TAR" = "internal")
#remotes::install_github("yikeshu0611/oncomineR")
# 调用oncomineR包
rm(list = ls()) 
library(ggplot2)
## Registered S3 methods overwritten by 'tibble':
##   method     from  
##   format.tbl pillar
##   print.tbl  pillar
library(oncomineR) 
library(ggprism)
################# ##############input data 
dir_path <- "C:\\Users\\liyix\\OneDrive\\Desktop\\"
dir_path_name <- list.files(pattern = ".*mhtml",dir_path,full.names = F, recursive = F)
dir_path_name
## [1] "Acute Myeloid Leukemia_1.mhtml" "Acute Myeloid Leukemia_2.mhtml"
## [3] "Acute Myeloid Leukemia_3.mhtml"
length(dir_path_name) #[1] 41
## [1] 3
dir_path_name_1 <- unique(gsub("_[[:digit:]]", "", dir_path_name))
dir_path_name_2 <- gsub(" \\.", ".", dir_path_name_1)
dir_path_name_2 
## [1] "Acute Myeloid Leukemia.mhtml"
dir_path_name_3 <- unique(gsub(".mhtml", "", dir_path_name_2))
length(unique(dir_path_name_1)) #15
## [1] 1
library(oncomineR) 
data_pvalue <- list()
for (i in 1:length(dir_path_name_3)) {
  #i = 9
  print(i)
  #i = 2
  name <- grep(dir_path_name_3[i], list.files(pattern = ".*mhtml",dir_path,full.names = T, recursive = F), value = T)
  data_list <- list()
  for (j in 1:length(name)) {
    data_list[[j]] = oncomine(name[j]) 
    #print(dim(data_list[[j]]))
  }
  data_2 <- do.call("rbind", data_list)
  dim(data_2) #[1] 48  4
  data_3 <- unique(data_2)
  dim(data_3)
  table(data_3$`Legend Value`)
  dir_out <- "C:\\Users\\liyix\\OneDrive\\Desktop\\2021\\2021_HTS_ACHE\\data_plot_down\\"
  write.csv(data_3, paste0(dir_out,Sys.Date(),"-",dir_path_name_3[i],"_data_for_plot.csv"),row.names = FALSE)
  n_stat <- data.frame(table(data_3$`Legend Value`, data_3$`Sample Name`))
  n_stat <- n_stat[n_stat$Freq != "0",]
  table(n_stat$Var1)
  write.csv(data.frame(table(n_stat$Var1)), paste0(dir_out,Sys.Date(),"-",dir_path_name_3[i],"_data_for_plot_n.csv"),row.names = FALSE)
  ######################################################plot
  #head(data_3)
  #colnames(data_3)
  #str(data_3)
  #unique(data_3$`Legend Value`)
  #colnames(data_3)[2] <- "category"
  data_3$`Expression value` <- as.numeric(data_3$`Expression value`)
  head(data_3)
  data_3 <- data_3[data_3$`Legend Value` != "CD34-Positive Peripheral Blood Cell", ]
  data_3 <- data_3[data_3$`Legend Value` != "Rectum", ]
  data_3 <- data_3[data_3$`Legend Value` != "Ascending Colon" &
                     data_3$`Legend Value` != "Descending Colon" &
                     data_3$`Legend Value` != "Rectum" &
                     data_3$`Legend Value` != "Transverse Colon", ]
  unique(data_3$`Legend Value`)
  # make a plot
  p1 <- ggplot(data_3, aes(x = factor(`Legend Value`), y = `Expression value`)) + 
    geom_boxplot(aes(fill = factor(`Legend Value`))) + 
    #scale_fill_prism() + 
    scale_colour_prism() +
    theme_prism(base_size = 15) + 
    theme(legend.position = "none",
          axis.title.x = element_blank(), 
          axis.text.x = element_text(angle = 45, hjust = 1,vjust = 1)) + 
    scale_fill_manual(values = c("#00AFBB", "#E7B800")) 
  
  p1
  # p1
  ###########add pvalue
  cate_1 <- unique(data_3$`Normal Tissue Type`)[1]
  cate_2 <- unique(data_3$`Normal Tissue Type`)[2]
  data_p1 <- data_3[data_3$`Normal Tissue Type` == cate_1, ]$`Expression value`
  data_p2 <- data_3[data_3$`Normal Tissue Type` == cate_2, ]$`Expression value`
  df_p_val <- data.frame(
    group1 = cate_1,
    group2 = cate_2,
    p.adj = paste0("P = ",round(t.test(data_p1, data_p2)$p.value,digits = 2)),
    y.position = max(data_3$`Expression value`) + 0.5
  )
  df_p_val
  data_pvalue[[i]] <- df_p_val
  ifelse(df_p_val$p.adj == "P = 0", df_p_val$p.adj <- "P < 0.01", df_p_val$p.adj <- df_p_val$p.adj)
  
  # add the p-value
  p2 <- p1 + add_pvalue(df_p_val)
  #p2
  
  ggsave(filename = paste0(Sys.Date(),"-",dir_path_name_3[i],".tif"), 
         plot = last_plot(), device = "tiff", path = dir_out,
         width = 17, height = 17, units = "cm",
         dpi = 300, limitsize = TRUE,compression = "lzw")
  print(p2)
  
}
## [1] 1

data_p <- do.call("rbind", data_pvalue)
data_p$y.position <- NULL
write.csv(data_p, paste0(dir_out,Sys.Date(),"_data_pvalue.csv"),row.names = FALSE)
######################################################plot