library(ggplot2)
library(ggpubr)
library(ggsignif)
# 数据准备
main_cuisines <- c("浙菜", "粤菜", "西餐", "烧烤", "火锅", "海鲜")
plot_data <- 上海美食商铺[上海美食商铺$类别 %in% main_cuisines, ]
# 设置比较组(选取有商业分析价值的对比)
compared <- list(c("西餐", "浙菜"),
c("西餐", "粤菜"),
c("火锅", "海鲜"),
c("烧烤", "海鲜"))
# 创建图形
p <- ggplot(plot_data, aes(x=类别, y=口味, fill=类别)) +
# 几何对象
geom_violin(alpha=0.7, trim=FALSE, width=0.8) +
geom_boxplot(width=0.15, outlier.shape=21, outlier.size=2,
outlier.fill="white", show.legend=FALSE) +
# 统计检验
stat_compare_means(
comparisons = compared,
method = "wilcox.test",
label = "p.signif",
tip.length = 0.01,
size = 4,
vjust = 0.5
) +
stat_compare_means(
method = "kruskal.test",
label.y = 9.5,
label.x = 1.5
) +
# 视觉美化
scale_fill_manual(values = c("#4E79A7", "#F28E2B", "#E15759", "#76B7B2", "#59A14F", "#EDC948")) +
labs(
title = "上海主要菜系口味评分对比分析",
subtitle = "箱线图展示中位数与离群值,小提琴图显示概率密度分布",
x = NULL,
y = "口味评分(10分制)",
caption = paste0("数据来源:上海餐饮平台 | ",
"显著性检验:Kruskal-Wallis检验+Wilcoxon秩和检验\n",
"* p<0.05, ** p<0.01, *** p<0.001")
) +
# 主题定制
theme_minimal(base_size = 12) +
theme(
legend.position = "none",
axis.text.x = element_text(angle = 30, hjust = 1, face = "bold"),
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 10, hjust = 0.5, color = "gray40"),
panel.grid.major.x = element_blank(),
plot.caption = element_text(size = 8, color = "gray50", hjust = 0)
) +
# 均值标记
stat_summary(
fun = mean,
geom = "point",
shape = 23,
size = 3,
fill = "white",
color = "red"
) +
# 参考线
geom_hline(yintercept = mean(plot_data$口味),
linetype = "dashed",
color = "gray50",
alpha = 0.5)
print(p)