options(repos = c(CRAN = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
install.packages("phyloseq")
## Warning: package 'phyloseq' is not available for this version of R
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
install.packages("vegan")
## package 'vegan' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\86183\AppData\Local\Temp\RtmpK4U4yF\downloaded_packages
install.packages("ggplot2")
## package 'ggplot2' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\86183\AppData\Local\Temp\RtmpK4U4yF\downloaded_packages
library(phyloseq)
library(vegan)
## Warning: package 'vegan' was built under R version 4.4.3
## Loading required package: permute
## Loading required package: lattice
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# 输入OTU表格(不等重复)
otu_data <- read.csv("C:/data1/group_modified.csv", header = TRUE, row.names = 1)
# 提取OTU丰度数据
otu_matrix <- otu_data[, 1:13] # 假设前十三列是样本丰度
colnames(otu_matrix) <- c("BFO1", "BFO2", "BFO3", "BFO4", "BFOH2", "BFOH3", "RFO1", "RFO2", "RFO3", "RFO4", "RFOH1", "RFOH3","RFOH4") # 样本名称
sample_data <- data.frame(
SampleID = c("BFO1","BFO2","BFO3","BFO4",
"BFOH2","BFOH3",
"RFO1","RFO2","RFO3","RFO4",
"RFOH1","RFOH3","RFOH4"),
Group = rep(c("BFO","BFOH","RFO","RFOH"),
times = c(4,2,4,3)),
stringsAsFactors = FALSE
)
# 设置行名
rownames(sample_data) <- sample_data$SampleID
# 查看创建的数据
head(sample_data)
## SampleID Group
## BFO1 BFO1 BFO
## BFO2 BFO2 BFO
## BFO3 BFO3 BFO
## BFO4 BFO4 BFO
## BFOH2 BFOH2 BFOH
## BFOH3 BFOH3 BFOH
#创建样本(分组)信息
SAM <- sample_data(sample_data)
# 提取分类信息
tax_data <- otu_data[, 14] # 第十四列是分类信息
tax_strings <- as.character(tax_data) # 确保转换为字符类型
tax_matrix <- matrix(NA, nrow = length(tax_strings), ncol = 7)
colnames(tax_matrix) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")
for (i in 1:length(tax_strings)) {
# 按逗号拆分,并去除空白字符
split_tax <- trimws(unlist(strsplit(tax_strings[i], ",")))
# 提取每个层级的名称(去掉前缀如 "k:")
for (j in 1:length(split_tax)) {
level <- unlist(strsplit(split_tax[j], ":"))[1] # 如 "k", "p", "c"...
value <- unlist(strsplit(split_tax[j], ":"))[2] # 如 "Bacteria", "Proteobacteria"...
# 根据前缀匹配到对应的列
if (level == "k") tax_matrix[i, "Kingdom"] <- value
if (level == "p") tax_matrix[i, "Phylum"] <- value
if (level == "c") tax_matrix[i, "Class"] <- value
if (level == "o") tax_matrix[i, "Order"] <- value
if (level == "f") tax_matrix[i, "Family"] <- value
if (level == "g") tax_matrix[i, "Genus"] <- value
if (level == "s") tax_matrix[i, "Species"] <- value
}
}
# 设置行名为OTU名称
rownames(tax_matrix) <- rownames(otu_data)
# 检查结果
head(tax_matrix[, "Phylum"])
## OTU_10000 OTU_10001 OTU_10002
## "Proteobacteria" "unclassified_k_Bacteria" "Bacteroidota"
## OTU_10003 OTU_10004 OTU_1000
## "unclassified_k_Bacteria" "unclassified_k_Bacteria" "unclassified_k_Bacteria"
# 处理部分未分类结果
tax_matrix[tax_matrix == "unclassified_k_Bacteria"] <- "unclassified"
#tax_matrix[tax_matrix == "unclassified_p_Proteobacteria"] <- "unclassified"
# 转换为phyloseq对象
TAX <- tax_table(tax_matrix)
OTU <- otu_table(otu_matrix, taxa_are_rows = TRUE)
physeq <- phyloseq(OTU, TAX, SAM)
#########################
# alpha diversity
library(tidyr)
# 计算α多样性指数
alpha_div <- estimate_richness(physeq, measures = c("Chao1", "Shannon", "Simpson"))
# 添加分组信息
meta_data <- data.frame(sample_data(physeq))
alpha_div$SampleID <- rownames(alpha_div)
alpha_div <- merge(alpha_div, meta_data, by.x = "SampleID", by.y = "row.names")
head(alpha_div)
## SampleID Chao1 se.chao1 Shannon Simpson SampleID.y Group
## 1 BFO1 2741.227 32.30895 5.169566 0.9744869 BFO1 BFO
## 2 BFO2 3265.002 33.95917 5.243843 0.9717939 BFO2 BFO
## 3 BFO3 2928.869 35.74525 5.123405 0.9674471 BFO3 BFO
## 4 BFO4 3756.750 28.35577 5.632139 0.9830748 BFO4 BFO
## 5 BFOH2 3159.353 34.43258 5.351750 0.9775836 BFOH2 BFOH
## 6 BFOH3 3305.828 30.34769 5.354603 0.9746705 BFOH3 BFOH
alpha_long <- alpha_div %>%
select(SampleID, Group, Chao1, Shannon, Simpson) %>%
pivot_longer(cols = c(Chao1, Shannon, Simpson), names_to = "Index", values_to = "Value")
# 将指数名称转为因子并控制绘图顺序
alpha_long$Index <- factor(alpha_long$Index, levels = c("Chao1", "Shannon", "Simpson"))
# 自定配色方案
my_colors <- c("#D0B4BE", "#B6D6E4", "#8DB8A7", "#EDDE92") # 按分组数量调整
# 绘制分面箱线图
p_combined <- ggplot(alpha_long, aes(x = Group, y = Value, fill = Group)) +
geom_boxplot(alpha = 0.8) +
geom_jitter(width = 0.2, size = 1.5, alpha = 0.5) +
facet_wrap(~Index, scales = "free_y", nrow = 1) + # 横向排列
labs(x = "", y = "Alpha Diversity Value") +
theme_bw() +
scale_fill_manual(values = my_colors) +
scale_color_manual(values = my_colors) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "right"
)
print(p_combined)

#######################
# beta diversity
if (!require("ape")) install.packages("ape") # 用于PCoA分析
## Loading required package: ape
##
## Attaching package: 'ape'
## The following object is masked from 'package:dplyr':
##
## where
library(ape)
# 计算距离矩阵
dist_matrix <- phyloseq::distance(physeq, method = "bray") # 或 "unifrac"(如果有树)
# 执行PCoA
pcoa_result <- ape::pcoa(dist_matrix)
# 提取主坐标并合并样本信息
pcoa_scores <- as.data.frame(pcoa_result$vectors[, 1:2]) # 取前两轴
colnames(pcoa_scores) <- c("PCoA1", "PCoA2")
pcoa_scores$SampleID <- rownames(pcoa_scores)
pcoa_data <- merge(pcoa_scores,
data.frame(sample_data(physeq)),
by.x = "SampleID",
by.y = "row.names")
# 自定义颜色和形状(确保组间可区分)
color <- c("#D0B4BE", "#B6D6E4", "#8DB8A7", "#EDDE92") # 按分组数量调整
shapes <- c(16, 17, 15, 18) # 不同形状区分组别
# 绘制PCoA图形
ggplot(pcoa_data, aes(x = PCoA1, y = PCoA2,
color = Group, shape = Group)) +
geom_point(size = 4, alpha = 0.8) +
stat_ellipse(data = subset(pcoa_data, Group %in% c("BFO", "RFO")), # 只对A、B组画椭圆
level = 0.8, linetype = 2, linewidth = 0.5) +
scale_color_manual(values = color) +
scale_shape_manual(values = shapes) +
labs(x = paste0("PCoA1 (", round(pcoa_result$values$Relative_eig[1] * 100, 1), "%)"),
y = paste0("PCoA2 (", round(pcoa_result$values$Relative_eig[2] * 100, 1), "%)"),
title = "Beta Diversity PCoA (Bray-Curtis Distance)") +
theme_bw() +
theme(legend.position = "right") +
annotate("text", x = Inf, y = Inf,
label = paste("BFO: n=4\nBFOH: n=2\nRFO: n=4\nRFOH: n=3"),
hjust = 1.1, vjust = 1.1)

###############
# defining URL
url <- "https://example.com/somefile.zip"
# defining address
destfile <- "D:output.word"
# use "curl" to download
download.file(url, destfile, method = "curl")