# Cài thư viện cần thiết
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(openxlsx)
## Warning: package 'openxlsx' was built under R version 4.3.3
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
setwd("F:/PAPER_2024/HAMTHUANDAMI/data")
# Đọc dữ liệu (giả sử file tên "dulieu.xlsx", sheet 1)
# dữ liệu gồm: OTC, HTR, Species, BA
df <- read.xlsx("dulieu.xlsx", sheet = 1)
head(df)
##   OTC HTR            Species          BA
## 1   1  HG L. pseudosundaicus 0.076464968
## 2   1  HG L. pseudosundaicus 0.096337580
## 3   1  HG L. pseudosundaicus 0.016122611
## 4   1  HG L. pseudosundaicus 0.015414013
## 5   1  HG      N. melliferum 0.007165605
## 6   1  HG      N. melliferum 0.014044586
#vẽ lại biểu đồ tương đồng

library(UpSetR)
## Warning: package 'UpSetR' was built under R version 4.3.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.2
## Warning: package 'forcats' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Đọc dữ liệu

# Bước 1: loại trùng trong mỗi trạng thái rừng
df_unique <- df %>% distinct(HTR, Species)

# Bước 2: tạo ma trận presence/absence loài × trạng thái
mat <- table(df_unique$Species, df_unique$HTR)
mat_bin <- ifelse(mat > 0, 1, 0)

# Bước 3: Vẽ UpSet plot
p<-upset(
  as.data.frame(as.matrix(mat_bin)),
  sets = colnames(mat_bin),
  order.by = "freq",
  keep.order = TRUE,
  mainbar.y.label = "Số loài giao nhau",
  sets.x.label = "Số loài theo trạng thái"
)
print(p)

#Bước 4: Vẽ sơ đồ Venn
library(VennDiagram)
## Warning: package 'VennDiagram' was built under R version 4.3.3
## Loading required package: grid
## Loading required package: futile.logger
## Warning: package 'futile.logger' was built under R version 4.3.3
# Tạo list các loài cho từng trạng thái
sp_list <- split(df_unique$Species, df_unique$HTR)

# Vẽ Venn 3 tập
venn.plot <- venn.diagram(
  sp_list,
  filename = NULL,
  fill = c("skyblue", "pink", "lightgreen"),
  alpha = 0.6,
  cex = 1.5,
  cat.cex = 1.5,
  cat.pos = 0
)

grid::grid.draw(venn.plot)