options(repos = c(CRAN = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))

install.packages("phyloseq")
## Warning: package 'phyloseq' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
install.packages("vegan")
## package 'vegan' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\86183\AppData\Local\Temp\RtmpAzqsdi\downloaded_packages
install.packages("ggplot2")
## package 'ggplot2' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\86183\AppData\Local\Temp\RtmpAzqsdi\downloaded_packages
library(phyloseq)
library(vegan)
## Warning: package 'vegan' was built under R version 4.4.3
## Loading required package: permute
## Loading required package: lattice
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# 输入OTU表格(分组数据)
otu_data <- read.csv("C:/data1/group_modified-sum.csv", header = TRUE, row.names = 1)

# 提取OTU丰度数据
otu_matrix <- otu_data[, 1:4]  # 假设前四列是样本丰度
colnames(otu_matrix) <- c("BFO", "BFOH", "RFO", "RFOH")  # 样本名称

# 提取分类信息
tax_data <- otu_data[, 5]
tax_strings <- as.character(tax_data)  # 确保转换为字符类型
tax_matrix <- matrix(NA, nrow = length(tax_strings), ncol = 7)
colnames(tax_matrix) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")

for (i in 1:length(tax_strings)) {
  # 按逗号拆分,并去除空白字符
  split_tax <- trimws(unlist(strsplit(tax_strings[i], ",")))
  
  # 提取每个层级的名称(去掉前缀如 "k:")
  for (j in 1:length(split_tax)) {
    level <- unlist(strsplit(split_tax[j], ":"))[1]  # 如 "k", "p", "c"...
    value <- unlist(strsplit(split_tax[j], ":"))[2]  # 如 "Bacteria", "Proteobacteria"...
    
    # 根据前缀匹配到对应的列
    if (level == "k") tax_matrix[i, "Kingdom"] <- value
    if (level == "p") tax_matrix[i, "Phylum"] <- value
    if (level == "c") tax_matrix[i, "Class"] <- value
    if (level == "o") tax_matrix[i, "Order"] <- value
    if (level == "f") tax_matrix[i, "Family"] <- value
    if (level == "g") tax_matrix[i, "Genus"] <- value
    if (level == "s") tax_matrix[i, "Species"] <- value
  }
}

# 设置行名为OTU名称
rownames(tax_matrix) <- rownames(otu_data)

# 检查结果
head(tax_matrix[, "Phylum"])
##                 OTU_10000                 OTU_10001                 OTU_10002 
##          "Proteobacteria" "unclassified_k_Bacteria"            "Bacteroidota" 
##                 OTU_10003                 OTU_10004                  OTU_1000 
## "unclassified_k_Bacteria" "unclassified_k_Bacteria" "unclassified_k_Bacteria"
# 处理部分未分类结果
tax_matrix[tax_matrix == "unclassified_k_Bacteria"] <- "unclassified"
#tax_matrix[tax_matrix == "unclassified_p_Proteobacteria"] <- "unclassified"

# 转换为phyloseq对象
TAX <- tax_table(tax_matrix)
OTU <- otu_table(otu_matrix, taxa_are_rows = TRUE)
physeq <- phyloseq(OTU, TAX)


###########################
# community composition
# 数据预处理:合并相同门的OTU
phylum_physeq <- tax_glom(physeq, taxrank = "Phylum", NArm = FALSE)

# 转换为相对丰度(百分比)
phylum_physeq_rel <- transform_sample_counts(phylum_physeq, function(x) x/sum(x)*100)

# 提取数据用于绘图
phylum_data <- psmelt(phylum_physeq_rel)  # 转换为长格式数据框

# 筛选丰度较高的门(例如>1%),其余归为"Other"
top_phyla <- phylum_data %>%
  group_by(Phylum) %>%
  summarize(mean_abund = mean(Abundance)) %>%
  arrange(desc(mean_abund)) %>%
  filter(mean_abund >= 1) %>%  # 可调整此阈值
  pull(Phylum)

phylum_data$Phylum[!(phylum_data$Phylum %in% top_phyla)] <- "Other"

# 确保 Phylum 是因子类型
phylum_data$Phylum <- as.factor(phylum_data$Phylum)

# 获取所有非 Other/unclassified 的类群
other_phyla <- setdiff(levels(phylum_data$Phylum), c("Other", "unclassified"))

# 重新设置因子顺序:Other 和 unclassified 在图例最底部
phylum_data$Phylum <- factor(phylum_data$Phylum,
                             levels = c(other_phyla, "Other", "unclassified"))

# 画堆积柱形图
ggplot(phylum_data, aes(x = Sample, y = Abundance, fill = Phylum)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(x = "Sample", y = "Relative Abundance (%)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +# 调整x轴标签角度
  scale_fill_manual(values = c("#67aecb", "#D0D4BE", "#8DB8A7", "#EDDE92", 
                               "#AE9CA3", "#D0B4BE", "#E4A09E", "#B6D6E4",
                               "#1f77b4", "grey", "#7f7f7f"))

###############
# defining URL
url <- "https://example.com/somefile.zip"

# defining address
destfile <- "D:output.word"


# use "curl" to download
download.file(url, destfile, method = "curl")