# ============================================================
# 1) 数据导入:读取原始月度汇总表
# ============================================================
df_raw <- read.xlsx("汇总.xlsx")
# 单位转换:元 -> 万元
df <- df_raw %>%
mutate(across(where(is.numeric) & !matches("月份|季度"), ~ . / 10000))
if (knitr::is_html_output()) {
# HTML: 使用 DT 展示完整数据
datatable(df,
caption = "数据预览(单位:万元)",
options = list(scrollX = TRUE, pageLength = 12)) %>%
formatRound(columns = sapply(df, is.numeric), digits = 2)
} else {
# PDF: 展示金额最多的前8列
# 1. 找出数值列
num_cols <- df %>% select(where(is.numeric)) %>% select(-matches("月份|季度|绩效"))
# 2. 计算列总和并排序
col_sums <- colSums(num_cols, na.rm = TRUE)
top_cols <- names(sort(col_sums, decreasing = TRUE))[1:min(8, length(col_sums))]
# 3. 组合最终展示列
show_cols <- c("月份", top_cols)
#
show_cols <- intersect(show_cols, names(df))
knitr::kable(df[, show_cols], digits = 2,
caption = "数据预览(收入最多的前8项,单位:万元)")
}# ============================================================
# 2) 年度总体汇总:全年总收入、全年绩效、绩效占比
# ============================================================
year_sum <- df %>%
summarise(
月数 = n(),
全年总收入 = sum(`总计`, na.rm = TRUE),
全年绩效 = sum(`绩效`, na.rm = TRUE),
绩效占比 = 全年绩效 / 全年总收入
) %>%
mutate(
绩效占比 = scales::percent(绩效占比, accuracy = 0.01)
)
if (knitr::is_html_output()) {
datatable(year_sum, caption = "年度总体数据(万元)", options = list(dom = 't')) %>%
formatRound(columns = c("全年总收入", "全年绩效"), digits = 2)
} else {
knitr::kable(year_sum, digits = 2, caption = "年度总体数据(万元)")
}# ============================================================
# 3) 月度收入趋势表
# ============================================================
trend_tbl <- df %>%
arrange(月份) %>%
transmute(
月份 = as.integer(月份),
总计 = `总计`,
绩效 = `绩效`,
绩效占比 = `绩效` / `总计`
)
trend_tbl_fmt <- trend_tbl %>%
mutate(
绩效占比 = scales::percent(绩效占比, accuracy = 0.01)
)
if (knitr::is_html_output()) {
datatable(trend_tbl_fmt, caption = "月度收入趋势表(万元)", options = list(pageLength = 12)) %>%
formatRound(columns = c("总计", "绩效"), digits = 2)
} else {
knitr::kable(trend_tbl_fmt, digits = 2, caption = "月度收入趋势表(万元)")
}# ============================================================
# 4) 月度收入趋势可视化:总计、绩效柱状图
# ============================================================
p_total_trend <- ggplot(trend_tbl, aes(x = factor(月份), y = 总计)) +
geom_col(fill = "#2C7FB8") +
scale_y_continuous(labels = scales::label_number(big.mark = ",", accuracy = 0.01)) +
labs(title = "月度总收入趋势", x = "月份", y = "总收入(万元)") +
theme_minimal(base_size = 12)
p_perf_trend <- ggplot(trend_tbl, aes(x = factor(月份), y = 绩效)) +
geom_col(fill = "#D95F0E") +
scale_y_continuous(labels = scales::label_number(big.mark = ",", accuracy = 0.01)) +
labs(title = "月度绩效趋势", x = "月份", y = "绩效(万元)") +
theme_minimal(base_size = 12)
if (knitr::is_html_output()) {
# HTML 下使用交互式图表
htmltools::tagList(ggplotly(p_total_trend), ggplotly(p_perf_trend))
} else {
# PDF 下使用静态图表
print(p_total_trend)
print(p_perf_trend)
}# ============================================================
# 5) 月度趋势对比(标准化后的结果)
# ============================================================
trend_norm <- trend_tbl %>%
mutate(
总计_z = (总计 - mean(总计, na.rm = TRUE)) / sd(总计, na.rm = TRUE),
绩效_z = (绩效 - mean(绩效, na.rm = TRUE)) / sd(绩效, na.rm = TRUE)
) %>%
pivot_longer(cols = c(总计_z, 绩效_z), names_to = "指标", values_to = "z")
p_trend_compare <- ggplot(trend_norm, aes(x = factor(月份), y = z, group = 指标, color = 指标)) +
geom_line(linewidth = 1) +
geom_point(size = 2) +
scale_color_manual(
values = c("总计_z" = "#2C7FB8", "绩效_z" = "#D95F0E"),
labels = c("总计_z" = "总收入", "绩效_z" = "绩效")
) +
labs(
title = "总收入 vs 绩效(月度趋势对比,标准化)",
x = "月份", y = "标准化值(z-score)", color = NULL
) +
theme_minimal(base_size = 12)
if (knitr::is_html_output()) {
ggplotly(p_trend_compare)
} else {
p_trend_compare
}# ============================================================
# 6) 季度汇总
# ============================================================
qtr_tbl <- df %>%
mutate(
月份 = as.integer(月份),
季度 = case_when(
月份 %in% 1:3 ~ "Q1",
月份 %in% 4:6 ~ "Q2",
月份 %in% 7:9 ~ "Q3",
月份 %in% 10:12 ~ "Q4",
TRUE ~ NA_character_
),
季度 = factor(季度, levels = c("Q1", "Q2", "Q3", "Q4"))
) %>%
filter(!is.na(季度)) %>%
group_by(季度) %>%
summarise(
总计 = sum(`总计`, na.rm = TRUE),
绩效 = sum(`绩效`, na.rm = TRUE),
绩效率 = 绩效 / 总计,
.groups = "drop"
)
qtr_tbl_fmt <- qtr_tbl %>%
mutate(
总收入 = 总计,
绩效 = 绩效,
绩效率 = scales::percent(绩效率, accuracy = 0.01)
) %>%
select(季度, 总收入, 绩效, 绩效率)
if (knitr::is_html_output()) {
datatable(qtr_tbl_fmt, caption = "季度汇总表(万元)", options = list(dom = 't')) %>%
formatRound(columns = c("总收入", "绩效"), digits = 2)
} else {
knitr::kable(qtr_tbl_fmt, digits = 2, caption = "季度汇总表(万元)")
}# ============================================================
# 7) 全年收入组成分析:剔除床位费,汇总各项目全年金额并计算占比
# ============================================================
exclude_cols <- c("月份", "总计", "绩效", "床位费总费用")
component_cols <- setdiff(names(df), exclude_cols)
total_year <- sum(df$`总计`, na.rm = TRUE)
comp_year <- df %>%
summarise(across(all_of(component_cols), ~ sum(.x, na.rm = TRUE))) %>%
pivot_longer(cols = everything(), names_to = "项目", values_to = "全年金额") %>%
mutate(占比 = 全年金额 / total_year) %>%
arrange(desc(全年金额)) %>%
mutate(项目 = forcats::fct_reorder(项目, 全年金额, .desc = TRUE))
comp_year_fmt <- comp_year %>%
mutate(
占比 = scales::percent(占比, accuracy = 0.01)
)
if (knitr::is_html_output()) {
datatable(comp_year_fmt, caption = "全年收入结构(万元)", options = list(pageLength = 10)) %>%
formatRound(columns = "全年金额", digits = 2)
} else {
knitr::kable(comp_year_fmt, digits = 2, caption = "全年收入结构(万元)")
}p_comp_bar <- ggplot(comp_year, aes(x = 项目, y = 全年金额)) +
geom_col(fill = "#5B8FF9") +
coord_flip() +
geom_text(aes(label = scales::percent(占比, accuracy = 0.1)),
hjust = 0.98, size = 2.3, color = "white") +
scale_y_continuous(
labels = scales::label_number(big.mark = ",", accuracy = 1),
expand = expansion(mult = c(0, 0.35))
) +
labs(title = "全年收入组成(剔除床位费)", x = NULL, y = "全年金额(万元)") +
theme_minimal(base_size = 12) +
theme(plot.margin = ggplot2::margin(5.5, 30, 5.5, 5.5))
top_n <- 20
comp_top <- comp_year %>% slice_head(n = top_n)
p_comp_top <- ggplot(comp_top, aes(x = 项目, y = 全年金额)) +
geom_col(fill = "#5B8FF9") +
coord_flip() +
geom_text(aes(label = scales::percent(占比, accuracy = 0.1)),
hjust = 0.98, size = 2.3, color = "white") +
scale_y_continuous(
labels = scales::label_number(big.mark = ",", accuracy = 1),
expand = expansion(mult = c(0, 0.35))
) +
labs(title = paste0("全年收入组成 Top ", top_n, "(剔除床位费)"),
x = NULL, y = "全年金额(万元)") +
theme_minimal(base_size = 12) +
theme(plot.margin = ggplot2::margin(5.5, 30, 5.5, 5.5))
if (knitr::is_html_output()) {
htmltools::tagList(
ggplotly(p_comp_bar, width = 1100, height = 700),
ggplotly(p_comp_top, width = 1100, height = 700)
)
} else {
print(p_comp_bar)
print(p_comp_top)
}# ============================================================
# 9) 构造相关分析基础数据
# ============================================================
df_cor_base <- df %>%
select(-月份) %>%
relocate(绩效, .before = 1)
target <- "绩效"# ============================================================
# 10) Spearman:
# ============================================================
# 10.1 Spearman 相关矩阵
cor_mat_spear <- cor(df_cor_base, use = "pairwise.complete.obs", method = "spearman")
# 10.2 与绩效相关性排序条形图(Spearman)
r_vec_spear <- cor_mat_spear[target, ]
r_df_spear <- data.frame(var = names(r_vec_spear), r = as.numeric(r_vec_spear)) %>%
filter(var != target) %>%
arrange(desc(r))
p_bar_spear <- ggplot(r_df_spear, aes(x = reorder(var, r), y = r, fill = r)) +
geom_col() +
coord_flip() +
scale_fill_gradient2(low = "#2c7bb6", mid = "white", high = "#d7191c", midpoint = 0) +
labs(x = NULL, y = "Spearman 相关系数", title = "与绩效的相关性排序(Spearman)") +
theme_minimal(base_size = 12)
# 显示排序表格(表6)
if (knitr::is_html_output()) {
datatable(r_df_spear, caption = "Spearman 相关性排序", options = list(pageLength = 10)) %>%
formatRound(columns = "r", digits = 3)
} else {
knitr::kable(r_df_spear, digits = 3, caption = "Spearman 相关性排序")
}# 10.3 Spearman 的 P 值矩阵(逐对 cor.test)
vars_s <- names(df_cor_base)
p_mat_spear <- matrix(NA_real_, nrow = length(vars_s), ncol = length(vars_s),
dimnames = list(vars_s, vars_s))
for (i in seq_along(vars_s)) {
for (j in seq_along(vars_s)) {
xi <- df_cor_base[[vars_s[i]]]
xj <- df_cor_base[[vars_s[j]]]
ok <- is.finite(xi) & is.finite(xj)
if (sum(ok) >= 3) {
p_mat_spear[i, j] <- cor.test(xi[ok], xj[ok], method = "spearman", exact = FALSE)$p.value
}
}
}
# 10.4
cor_long_spear <- as.data.frame(cor_mat_spear) %>%
tibble::rownames_to_column("Var1") %>%
pivot_longer(-Var1, names_to = "Var2", values_to = "r")
p_long_spear <- as.data.frame(p_mat_spear) %>%
tibble::rownames_to_column("Var1") %>%
pivot_longer(-Var1, names_to = "Var2", values_to = "p")
corp_long_spear <- cor_long_spear %>%
left_join(p_long_spear, by = c("Var1", "Var2")) %>%
mutate(
Var1 = factor(Var1, levels = rownames(cor_mat_spear)),
Var2 = factor(Var2, levels = colnames(cor_mat_spear)),
p_star = case_when(
is.na(p) ~ "",
p < 0.001 ~ "***",
p < 0.01 ~ "**",
p < 0.05 ~ "*",
p < 0.1 ~ ".",
TRUE ~ ""
),
label = paste0(sprintf("%.2f", r), p_star),
label = if_else(Var1 == Var2, "", label) # 隐藏对角线
)
# 10.5 Spearman 热图
p_heat_spear <- ggplot(corp_long_spear, aes(x = Var2, y = Var1, fill = r)) +
geom_tile(color = "white", linewidth = 0.2) +
geom_text(aes(label = label), size = 2.0, lineheight = 0.9) +
coord_fixed() +
scale_fill_gradient2(
low = "#2C7BB6", mid = "white", high = "#D7191C",
midpoint = 0, limits = c(-1, 1),
name = "Spearman r"
) +
labs(
title = "相关系数热图(Spearman)",
subtitle = "*** p<0.001,** p<0.01,* p<0.05,. p<0.1",
x = NULL, y = NULL
) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 14),
axis.text.y = element_text(size = 14),
panel.grid = element_blank(),
plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 11)
)
if (knitr::is_html_output()) {
htmltools::tagList(
ggplotly(p_bar_spear, width = 1100, height = 600),
ggplotly(p_heat_spear, width = 1100, height = 900)
)
} else {
print(p_bar_spear)
print(p_heat_spear)
}# ============================================================
# 11) Pearson:
# ============================================================
df1 <- df_cor_base %>%
select(-c(
"会诊费总费用", "麻醉费总费用", "其他总费用", "抢救费总费用", "诊查费总费用",
"床位费总费用", "中医外治", "手术仪器", "介入材料", "介入治疗", "配方食品"
))
# 11.1 Pearson 相关矩阵
cor_mat_pear <- cor(df1, use = "pairwise.complete.obs", method = "pearson")
# 11.2 与绩效相关性排序条形图(Pearson)
r_vec_pear <- cor_mat_pear[target, ]
r_df_pear <- data.frame(var = names(r_vec_pear), r = as.numeric(r_vec_pear)) %>%
filter(var != target) %>%
arrange(desc(r))
p_bar_pear <- ggplot(r_df_pear, aes(x = reorder(var, r), y = r, fill = r)) +
geom_col() +
coord_flip() +
scale_fill_gradient2(low = "#2c7bb6", mid = "white", high = "#d7191c", midpoint = 0) +
labs(x = NULL, y = "Pearson 相关系数", title = "与绩效的相关性排序(Pearson)") +
theme_minimal(base_size = 12)
# 显示排序表格(表7)
if (knitr::is_html_output()) {
datatable(r_df_pear, caption = "Pearson 相关性排序", options = list(pageLength = 10)) %>%
formatRound(columns = "r", digits = 3)
} else {
knitr::kable(r_df_pear, digits = 3, caption = "Pearson 相关性排序")
}# 11.3 Pearson 的 P 值矩阵(逐对 cor.test)
vars_p <- names(df1)
p_mat_pear <- matrix(NA_real_, nrow = length(vars_p), ncol = length(vars_p),
dimnames = list(vars_p, vars_p))
for (i in seq_along(vars_p)) {
for (j in seq_along(vars_p)) {
xi <- df1[[vars_p[i]]]
xj <- df1[[vars_p[j]]]
ok <- is.finite(xi) & is.finite(xj)
if (sum(ok) >= 3) {
p_mat_pear[i, j] <- cor.test(xi[ok], xj[ok], method = "pearson")$p.value
}
}
}
# 11.4
cor_long_pear <- as.data.frame(cor_mat_pear) %>%
tibble::rownames_to_column("Var1") %>%
pivot_longer(-Var1, names_to = "Var2", values_to = "r")
p_long_pear <- as.data.frame(p_mat_pear) %>%
tibble::rownames_to_column("Var1") %>%
pivot_longer(-Var1, names_to = "Var2", values_to = "p")
corp_long_pear <- cor_long_pear %>%
left_join(p_long_pear, by = c("Var1", "Var2")) %>%
mutate(
Var1 = factor(Var1, levels = rownames(cor_mat_pear)),
Var2 = factor(Var2, levels = colnames(cor_mat_pear)),
p_star = case_when(
is.na(p) ~ "",
p < 0.001 ~ "***",
p < 0.01 ~ "**",
p < 0.05 ~ "*",
p < 0.1 ~ ".",
TRUE ~ ""
),
label = paste0(sprintf("%.2f", r), p_star),
label = if_else(Var1 == Var2, "", label)
)
# 11.5 Pearson 热图
p_heat_pear <- ggplot(corp_long_pear, aes(x = Var2, y = Var1, fill = r)) +
geom_tile(color = "white", linewidth = 0.2) +
geom_text(aes(label = label), size = 2.0, lineheight = 0.9) +
coord_fixed() +
scale_fill_gradient2(
low = "#2C7BB6", mid = "white", high = "#D7191C",
midpoint = 0, limits = c(-1, 1),
name = "Pearson r"
) +
labs(
title = "相关系数热图(Pearson)",
subtitle = "*** p<0.001,** p<0.01,* p<0.05,. p<0.1",
x = NULL, y = NULL
) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 14),
axis.text.y = element_text(size = 14),
panel.grid = element_blank(),
plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 11)
)
if (knitr::is_html_output()) {
htmltools::tagList(
ggplotly(p_bar_pear, width = 1100, height = 600),
ggplotly(p_heat_pear, width = 1100, height = 900)
)
} else {
# PDF 下使用静态图表
print(p_bar_pear)
print(p_heat_pear)
}