1 环境准备

2 数据导入与检查

数据为急诊+ICU1部+ICU2部的汇总数据。
# ============================================================
# 1) 数据导入:读取原始月度汇总表
# ============================================================
df_raw <- read.xlsx("汇总.xlsx")

# 单位转换:元 -> 万元
df <- df_raw %>%
  mutate(across(where(is.numeric) & !matches("月份|季度"), ~ . / 10000))

if (knitr::is_html_output()) {
  # HTML: 使用 DT 展示完整数据
  datatable(df, 
            caption = "数据预览(单位:万元)",
            options = list(scrollX = TRUE, pageLength = 12)) %>%
    formatRound(columns = sapply(df, is.numeric), digits = 2)
} else {
  # PDF: 展示金额最多的前8列
  # 1. 找出数值列
  num_cols <- df %>% select(where(is.numeric)) %>% select(-matches("月份|季度|绩效"))
  # 2. 计算列总和并排序
  col_sums <- colSums(num_cols, na.rm = TRUE)
  top_cols <- names(sort(col_sums, decreasing = TRUE))[1:min(8, length(col_sums))]
  # 3. 组合最终展示列
  show_cols <- c("月份", top_cols)
  # 
  show_cols <- intersect(show_cols, names(df))
  
  knitr::kable(df[, show_cols], digits = 2, 
               caption = "数据预览(收入最多的前8项,单位:万元)")
}

3 年度总体汇总

# ============================================================
# 2) 年度总体汇总:全年总收入、全年绩效、绩效占比
# ============================================================
year_sum <- df %>%
  summarise(
    月数 = n(),
    全年总收入 = sum(`总计`, na.rm = TRUE),
    全年绩效 = sum(`绩效`, na.rm = TRUE),
    绩效占比 = 全年绩效 / 全年总收入
  ) %>%
  mutate(
    绩效占比 = scales::percent(绩效占比, accuracy = 0.01)
  )

if (knitr::is_html_output()) {
  datatable(year_sum, caption = "年度总体数据(万元)", options = list(dom = 't')) %>%
     formatRound(columns = c("全年总收入", "全年绩效"), digits = 2)
} else {
  knitr::kable(year_sum, digits = 2, caption = "年度总体数据(万元)")
}

4 月度收入趋势分析

# ============================================================
# 3) 月度收入趋势表 
# ============================================================
trend_tbl <- df %>%
  arrange(月份) %>%
  transmute(
    月份 = as.integer(月份),
    总计 = `总计`,
    绩效 = `绩效`,
    绩效占比 = `绩效` / `总计`
  )

trend_tbl_fmt <- trend_tbl %>%
  mutate(
    绩效占比 = scales::percent(绩效占比, accuracy = 0.01)
  )

if (knitr::is_html_output()) {
  datatable(trend_tbl_fmt, caption = "月度收入趋势表(万元)", options = list(pageLength = 12)) %>%
     formatRound(columns = c("总计", "绩效"), digits = 2)
} else {
  knitr::kable(trend_tbl_fmt, digits = 2, caption = "月度收入趋势表(万元)")
}
# ============================================================
# 4) 月度收入趋势可视化:总计、绩效柱状图
# ============================================================

p_total_trend <- ggplot(trend_tbl, aes(x = factor(月份), y = 总计)) +
  geom_col(fill = "#2C7FB8") +
  scale_y_continuous(labels = scales::label_number(big.mark = ",", accuracy = 0.01)) +
  labs(title = "月度总收入趋势", x = "月份", y = "总收入(万元)") +
  theme_minimal(base_size = 12)

p_perf_trend <- ggplot(trend_tbl, aes(x = factor(月份), y = 绩效)) +
  geom_col(fill = "#D95F0E") +
  scale_y_continuous(labels = scales::label_number(big.mark = ",", accuracy = 0.01)) +
  labs(title = "月度绩效趋势", x = "月份", y = "绩效(万元)") +
  theme_minimal(base_size = 12)

if (knitr::is_html_output()) {
  # HTML 下使用交互式图表
  htmltools::tagList(ggplotly(p_total_trend), ggplotly(p_perf_trend))
} else {
  # PDF 下使用静态图表
  print(p_total_trend)
  print(p_perf_trend)
}
# ============================================================
# 5) 月度趋势对比(标准化后的结果)
# ============================================================
trend_norm <- trend_tbl %>%
  mutate(
    总计_z = (总计 - mean(总计, na.rm = TRUE)) / sd(总计, na.rm = TRUE),
    绩效_z = (绩效 - mean(绩效, na.rm = TRUE)) / sd(绩效, na.rm = TRUE)
  ) %>%
  pivot_longer(cols = c(总计_z, 绩效_z), names_to = "指标", values_to = "z")

p_trend_compare <- ggplot(trend_norm, aes(x = factor(月份), y = z, group = 指标, color = 指标)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2) +
  scale_color_manual(
    values = c("总计_z" = "#2C7FB8", "绩效_z" = "#D95F0E"),
    labels = c("总计_z" = "总收入", "绩效_z" = "绩效")
  ) +
  labs(
    title = "总收入 vs 绩效(月度趋势对比,标准化)",
    x = "月份", y = "标准化值(z-score)", color = NULL
  ) +
  theme_minimal(base_size = 12)

if (knitr::is_html_output()) {
  ggplotly(p_trend_compare)
} else {
  p_trend_compare
}

5 季度汇总

# ============================================================
# 6) 季度汇总
# ============================================================
qtr_tbl <- df %>%
  mutate(
    月份 = as.integer(月份),
    季度 = case_when(
      月份 %in% 1:3   ~ "Q1",
      月份 %in% 4:6   ~ "Q2",
      月份 %in% 7:9   ~ "Q3",
      月份 %in% 10:12 ~ "Q4",
      TRUE ~ NA_character_
    ),
    季度 = factor(季度, levels = c("Q1", "Q2", "Q3", "Q4"))
  ) %>%
  filter(!is.na(季度)) %>%
  group_by(季度) %>%
  summarise(
    总计 = sum(`总计`, na.rm = TRUE),
    绩效 = sum(`绩效`, na.rm = TRUE),
    绩效率 = 绩效 / 总计,
    .groups = "drop"
  )

qtr_tbl_fmt <- qtr_tbl %>%
  mutate(
    总收入 = 总计,
    绩效 = 绩效,
    绩效率 = scales::percent(绩效率, accuracy = 0.01)
  ) %>%
  select(季度, 总收入, 绩效, 绩效率)

if (knitr::is_html_output()) {
  datatable(qtr_tbl_fmt, caption = "季度汇总表(万元)", options = list(dom = 't')) %>%
     formatRound(columns = c("总收入", "绩效"), digits = 2)
} else {
  knitr::kable(qtr_tbl_fmt, digits = 2, caption = "季度汇总表(万元)")
}

6 全年收入结构

床位费为负值,此处剔除床位费。
# ============================================================
# 7) 全年收入组成分析:剔除床位费,汇总各项目全年金额并计算占比
# ============================================================
exclude_cols <- c("月份", "总计", "绩效", "床位费总费用")
component_cols <- setdiff(names(df), exclude_cols)

total_year <- sum(df$`总计`, na.rm = TRUE)

comp_year <- df %>%
  summarise(across(all_of(component_cols), ~ sum(.x, na.rm = TRUE))) %>%
  pivot_longer(cols = everything(), names_to = "项目", values_to = "全年金额") %>%
  mutate(占比 = 全年金额 / total_year) %>%
  arrange(desc(全年金额)) %>%
  mutate(项目 = forcats::fct_reorder(项目, 全年金额, .desc = TRUE))

comp_year_fmt <- comp_year %>%
  mutate(
    占比 = scales::percent(占比, accuracy = 0.01)
  )

if (knitr::is_html_output()) {
  datatable(comp_year_fmt, caption = "全年收入结构(万元)", options = list(pageLength = 10)) %>%
     formatRound(columns = "全年金额", digits = 2)
} else {
  knitr::kable(comp_year_fmt, digits = 2, caption = "全年收入结构(万元)")
}
p_comp_bar <- ggplot(comp_year, aes(x = 项目, y = 全年金额)) +
  geom_col(fill = "#5B8FF9") +
  coord_flip() +
  geom_text(aes(label = scales::percent(占比, accuracy = 0.1)),
            hjust = 0.98, size = 2.3, color = "white") +
  scale_y_continuous(
    labels = scales::label_number(big.mark = ",", accuracy = 1),
    expand = expansion(mult = c(0, 0.35))
  ) +
  labs(title = "全年收入组成(剔除床位费)", x = NULL, y = "全年金额(万元)") +
  theme_minimal(base_size = 12) +
  theme(plot.margin = ggplot2::margin(5.5, 30, 5.5, 5.5))

top_n <- 20
comp_top <- comp_year %>% slice_head(n = top_n)

p_comp_top <- ggplot(comp_top, aes(x = 项目, y = 全年金额)) +
  geom_col(fill = "#5B8FF9") +
  coord_flip() +
  geom_text(aes(label = scales::percent(占比, accuracy = 0.1)),
            hjust = 0.98, size = 2.3, color = "white") +
  scale_y_continuous(
    labels = scales::label_number(big.mark = ",", accuracy = 1),
    expand = expansion(mult = c(0, 0.35))
  ) +
  labs(title = paste0("全年收入组成 Top ", top_n, "(剔除床位费)"),
       x = NULL, y = "全年金额(万元)") +
  theme_minimal(base_size = 12) +
  theme(plot.margin = ggplot2::margin(5.5, 30, 5.5, 5.5))

if (knitr::is_html_output()) {
  htmltools::tagList(
    ggplotly(p_comp_bar, width = 1100, height = 700),
    ggplotly(p_comp_top, width = 1100, height = 700)
  )
} else {
  print(p_comp_bar)
  print(p_comp_top)
}

7 相关性分析:数据准备

# ============================================================
# 9) 构造相关分析基础数据 
# ============================================================
df_cor_base <- df %>%
  select(-月份) %>%
  relocate(绩效, .before = 1)

target <- "绩效"

8 Spearman 相关分析

分析哪些项目的收入与绩效收入最相关,此处包含所有的项目。
# ============================================================
# 10) Spearman:
# ============================================================

# 10.1 Spearman 相关矩阵
cor_mat_spear <- cor(df_cor_base, use = "pairwise.complete.obs", method = "spearman")

# 10.2 与绩效相关性排序条形图(Spearman)
r_vec_spear <- cor_mat_spear[target, ]
r_df_spear <- data.frame(var = names(r_vec_spear), r = as.numeric(r_vec_spear)) %>%
  filter(var != target) %>%
  arrange(desc(r))

p_bar_spear <- ggplot(r_df_spear, aes(x = reorder(var, r), y = r, fill = r)) +
  geom_col() +
  coord_flip() +
  scale_fill_gradient2(low = "#2c7bb6", mid = "white", high = "#d7191c", midpoint = 0) +
  labs(x = NULL, y = "Spearman 相关系数", title = "与绩效的相关性排序(Spearman)") +
  theme_minimal(base_size = 12)

# 显示排序表格(表6)
if (knitr::is_html_output()) {
  datatable(r_df_spear, caption = "Spearman 相关性排序", options = list(pageLength = 10)) %>%
     formatRound(columns = "r", digits = 3)
} else {
  knitr::kable(r_df_spear, digits = 3, caption = "Spearman 相关性排序")
}
# 10.3 Spearman 的 P 值矩阵(逐对 cor.test)
vars_s <- names(df_cor_base)
p_mat_spear <- matrix(NA_real_, nrow = length(vars_s), ncol = length(vars_s),
                      dimnames = list(vars_s, vars_s))

for (i in seq_along(vars_s)) {
  for (j in seq_along(vars_s)) {
    xi <- df_cor_base[[vars_s[i]]]
    xj <- df_cor_base[[vars_s[j]]]
    ok <- is.finite(xi) & is.finite(xj)

    if (sum(ok) >= 3) {
      p_mat_spear[i, j] <- cor.test(xi[ok], xj[ok], method = "spearman", exact = FALSE)$p.value
    }
  }
}

# 10.4 
cor_long_spear <- as.data.frame(cor_mat_spear) %>%
  tibble::rownames_to_column("Var1") %>%
  pivot_longer(-Var1, names_to = "Var2", values_to = "r")

p_long_spear <- as.data.frame(p_mat_spear) %>%
  tibble::rownames_to_column("Var1") %>%
  pivot_longer(-Var1, names_to = "Var2", values_to = "p")

corp_long_spear <- cor_long_spear %>%
  left_join(p_long_spear, by = c("Var1", "Var2")) %>%
  mutate(
    Var1 = factor(Var1, levels = rownames(cor_mat_spear)),
    Var2 = factor(Var2, levels = colnames(cor_mat_spear)),
    p_star = case_when(
      is.na(p)  ~ "",
      p < 0.001 ~ "***",
      p < 0.01  ~ "**",
      p < 0.05  ~ "*",
      p < 0.1   ~ ".",
      TRUE      ~ ""
    ),
    label = paste0(sprintf("%.2f", r), p_star),
    label = if_else(Var1 == Var2, "", label)   # 隐藏对角线
  )

# 10.5 Spearman 热图
p_heat_spear <- ggplot(corp_long_spear, aes(x = Var2, y = Var1, fill = r)) +
  geom_tile(color = "white", linewidth = 0.2) +
  geom_text(aes(label = label), size = 2.0, lineheight = 0.9) +
  coord_fixed() +
  scale_fill_gradient2(
    low = "#2C7BB6", mid = "white", high = "#D7191C",
    midpoint = 0, limits = c(-1, 1),
    name = "Spearman r"
  ) +
  labs(
    title = "相关系数热图(Spearman)",
    subtitle = "*** p<0.001,** p<0.01,* p<0.05,. p<0.1",
    x = NULL, y = NULL
  ) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 14),
    axis.text.y = element_text(size = 14),
    panel.grid = element_blank(),
    plot.title = element_text(size = 16, face = "bold"),
    plot.subtitle = element_text(size = 11)
  )

if (knitr::is_html_output()) {
  htmltools::tagList(
    ggplotly(p_bar_spear, width = 1100, height = 600),
    ggplotly(p_heat_spear, width = 1100, height = 900)
  )
} else {
  print(p_bar_spear)
  print(p_heat_spear)
}

9 Pearson 相关分析

剔除了每月收入不足1万元的一些项目,以及中医外治这种存在好几个月份收入为0的项目,因此这个结果可能更符合实际。
# ============================================================
# 11) Pearson:
# ============================================================
df1 <- df_cor_base %>%
  select(-c(
    "会诊费总费用", "麻醉费总费用", "其他总费用", "抢救费总费用", "诊查费总费用",
    "床位费总费用", "中医外治", "手术仪器", "介入材料", "介入治疗", "配方食品"
  ))

# 11.1 Pearson 相关矩阵
cor_mat_pear <- cor(df1, use = "pairwise.complete.obs", method = "pearson")

# 11.2 与绩效相关性排序条形图(Pearson)
r_vec_pear <- cor_mat_pear[target, ]
r_df_pear <- data.frame(var = names(r_vec_pear), r = as.numeric(r_vec_pear)) %>%
  filter(var != target) %>%
  arrange(desc(r))

p_bar_pear <- ggplot(r_df_pear, aes(x = reorder(var, r), y = r, fill = r)) +
  geom_col() +
  coord_flip() +
  scale_fill_gradient2(low = "#2c7bb6", mid = "white", high = "#d7191c", midpoint = 0) +
  labs(x = NULL, y = "Pearson 相关系数", title = "与绩效的相关性排序(Pearson)") +
  theme_minimal(base_size = 12)

# 显示排序表格(表7)
if (knitr::is_html_output()) {
  datatable(r_df_pear, caption = "Pearson 相关性排序", options = list(pageLength = 10)) %>%
     formatRound(columns = "r", digits = 3)
} else {
  knitr::kable(r_df_pear, digits = 3, caption = "Pearson 相关性排序")
}
# 11.3 Pearson 的 P 值矩阵(逐对 cor.test)
vars_p <- names(df1)
p_mat_pear <- matrix(NA_real_, nrow = length(vars_p), ncol = length(vars_p),
                     dimnames = list(vars_p, vars_p))

for (i in seq_along(vars_p)) {
  for (j in seq_along(vars_p)) {
    xi <- df1[[vars_p[i]]]
    xj <- df1[[vars_p[j]]]
    ok <- is.finite(xi) & is.finite(xj)

    if (sum(ok) >= 3) {
      p_mat_pear[i, j] <- cor.test(xi[ok], xj[ok], method = "pearson")$p.value
    }
  }
}

# 11.4 
cor_long_pear <- as.data.frame(cor_mat_pear) %>%
  tibble::rownames_to_column("Var1") %>%
  pivot_longer(-Var1, names_to = "Var2", values_to = "r")

p_long_pear <- as.data.frame(p_mat_pear) %>%
  tibble::rownames_to_column("Var1") %>%
  pivot_longer(-Var1, names_to = "Var2", values_to = "p")

corp_long_pear <- cor_long_pear %>%
  left_join(p_long_pear, by = c("Var1", "Var2")) %>%
  mutate(
    Var1 = factor(Var1, levels = rownames(cor_mat_pear)),
    Var2 = factor(Var2, levels = colnames(cor_mat_pear)),
    p_star = case_when(
      is.na(p)  ~ "",
      p < 0.001 ~ "***",
      p < 0.01  ~ "**",
      p < 0.05  ~ "*",
      p < 0.1   ~ ".",
      TRUE      ~ ""
    ),
    label = paste0(sprintf("%.2f", r), p_star),
    label = if_else(Var1 == Var2, "", label)
  )

# 11.5 Pearson 热图
p_heat_pear <- ggplot(corp_long_pear, aes(x = Var2, y = Var1, fill = r)) +
  geom_tile(color = "white", linewidth = 0.2) +
  geom_text(aes(label = label), size = 2.0, lineheight = 0.9) +
  coord_fixed() +
  scale_fill_gradient2(
    low = "#2C7BB6", mid = "white", high = "#D7191C",
    midpoint = 0, limits = c(-1, 1),
    name = "Pearson r"
  ) +
  labs(
    title = "相关系数热图(Pearson)",
    subtitle = "*** p<0.001,** p<0.01,* p<0.05,. p<0.1",
    x = NULL, y = NULL
  ) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 14),
    axis.text.y = element_text(size = 14),
    panel.grid = element_blank(),
    plot.title = element_text(size = 16, face = "bold"),
    plot.subtitle = element_text(size = 11)
  )

if (knitr::is_html_output()) {
  htmltools::tagList(
    ggplotly(p_bar_pear, width = 1100, height = 600),
    ggplotly(p_heat_pear, width = 1100, height = 900)
  )
} else {
  # PDF 下使用静态图表
  print(p_bar_pear)
  print(p_heat_pear)
}
这个结果可以看出,治疗费、自制药、氧气费、护理费、中草药、检查费是和绩效收入相关性比较高的项目。由于数据太少,只有12个月的, 因此只能得出大概的结果,像中医特殊疗法这一项,最后三个月的收入为0,因此统计之后就可能产生与预期不符的情况,但总体趋势应该是对的。