1 基本想法

  • 我们想同时描述“温度—死亡的非线性关系”和“效应随时间延迟的分布”,因此用到 DLNM
  • 做法上:先用 crossbasis() 把温度×滞后变成一组新变量,然后在 (类)Poisson 的 GLM 框架中拟合,最后用 crosspred()crossreduce() 还原为直观图形。

2 分布滞后模型 vs. 自回归模型

  • 分布滞后模型(DLM):解释变量是 \(X_{t}, X_{t-1},\ldots,X_{t-s}\)
  • 自回归模型(AR):解释变量是被解释变量自身 \(Y_{t-1},\ldots,Y_{t-q}\)
  • DLM 可线性,但现实中的暴露–反应常非线性,于是需要 DLNM

3 小史与适用场景

  • 2006 由 Armstrong 引入到流行病学;2010 起 Gasparrini 等系统化 crossbasis 理论与实现;之后扩展到惩罚样条、降维合并与 meta 分析等。
  • 典型应用:温度/空气污染 ↔︎ 即时与延迟的健康效应(死亡、就诊、住院等)。

4 模型结构

\[ Y_t \sim \text{Poisson/Quasi-Poisson}(\mu_t), \quad \log \mu_t = \alpha + \underbrace{cb(X_t,\ldots,X_{t-L})}_{\text{cross-basis}} + s(\text{time}) + \text{dow} + \cdots \]

其中 cb()crossbasis() 产生,s(time) 常用样条控制季节/长期趋势。

library(dlnm); library(ggplot2); library(mice); library(reshape2); library(splines)

5 数据与预处理(ChicagoNMMAPS)

# 调用芝加哥数据(来自 dlnm 包)
data("chicagoNMMAPS")
data <- as.data.frame(chicagoNMMAPS)

# —— 数据预处理 ——
data[data$death > 200, 'death'] <- NA      # 离群值 -> NA
data[data$cvd   > 100, 'cvd']   <- NA
data[data$resp  > 200, 'resp']  <- NA

# 生成两日间温差序列
data$temp_dif <- c(NA, diff(data$temp))

# mice 插补
miceMod <- mice(data, printFlag = FALSE)
data    <- complete(miceMod)

# 进一步把高值 pm10 标为 NA 并再次插补
data[data$pm10 > 150, 'pm10'] <- NA
miceMod <- mice(data, printFlag = FALSE)
data    <- complete(miceMod)

5.1 总览图

plot_varible <- c("death", "cvd", "resp", "temp", "temp_dif", "pm10", "o3")
plot_data <- melt(data, id=c('date'), measure.vars = plot_varible)

plot_data$variable <- factor(
  plot_data$variable,
  levels = c("death","cvd","resp","temp","temp_dif","pm10","o3"),
  labels = c('死亡人数','心血管疾病致死','呼吸道疾病致死','平均温度','两天间温度差',
             expression(paste(PM[10], " (", mu, "g/", m^3, ")")),
             expression(paste(O[3],  " (", mu, "g/", m^3, ")")))
)

# 统一色板(7色),配合 facet 独立 y 轴
cols7 <- c("#44546A","#1F77B4","#2CA02C","#D62728","#9467BD","#8C564B","#17BECF")

ggplot(plot_data, aes(x = as.factor(date), y = value, color = variable, fill = variable)) +
  geom_point(alpha=.45, shape=16, size=.7, stroke=0) +
  scale_color_manual(values = cols7) +
  scale_fill_manual(values  = cols7) +
  scale_y_continuous(name = NULL) +
  scale_x_discrete(name = "Time",
                   breaks = c('1988-01-01','1990-01-01','1992-01-01','1994-01-01',
                              '1996-01-01','1998-01-01','2000-01-01')) +
  theme_minimal(base_size = 10) +
  theme(legend.position = "none",
        panel.grid.minor = element_blank(),
        strip.text = element_text(face="bold"),
        axis.text.x = element_text(angle = 0, vjust=.9, hjust=.9)) +
  facet_wrap(~variable, ncol=1, scales='free', labeller = label_parsed)

5.2 温度–死亡散点

ggplot(data, aes(x = temp, y = death)) +
  geom_point(alpha=.35, size=1.1, color="#2B8CBE") +
  labs(x = "Temperature (°C)", y = "Death") +
  theme_minimal(base_size = 11) +
  theme(panel.grid.minor = element_blank())

5.3 “分箱均值气泡图”

scatter_plot <- function(input1, piece) {
  input <- data[[input1]]
  m <- max(input, na.rm = TRUE)
  n <- min(input, na.rm = TRUE)
  output <- NULL
  for (i in ceiling(piece*n/m):(piece-1)) {
    sel <- (m*i/piece < input & input < m*(i+1)/piece)
    y_rate <- mean(data[sel, 'death'], na.rm = TRUE)
    number <- sum(sel, na.rm = TRUE)
    a <- cbind((i/piece)*m + m/(2*piece), y_rate, number)
    output <- rbind(output, a)
  }
  output <- as.data.frame(output)
  colnames(output) <- c(input1,'death','number')
  ggplot(output, aes(x = .data[[input1]], y = death, size = number, color = number)) +
    scale_color_gradient2(low = "#FFF7EC", mid = "#FDAE6B", high = "#E6550D") +
    geom_point(alpha=.75) +
    labs(x = input1, y = "Mean Death in Bin") +
    theme_minimal(base_size = 11) +
    theme(legend.position = "bottom") +
    scale_size_continuous(range = c(0.8, 6))
}
scatter_plot('temp', 200)

6 crossbasis 与模型

示例与说明:下面给出多种 fun 的写法示例。为了避免在教学时被“参数不匹配”打断,示例块不执行;真正用于估计的模型见下一节。

# B 样条(bs):degree=2,温度在 20°C 处设节点
cb.temp <- crossbasis(data$temp, lag=10,
                      argvar = list(fun="bs", degree=2, knots = c(20)),
                      arglag = list(fun="lin"))

# 自然样条(ns):knots 与 bs 类似,注意 ns 通常用 df 或 knots 二选一
cb.temp <- crossbasis(data$temp, lag=10,
                      argvar = list(fun="ns", knots = c(20)),
                      arglag = list(fun="lin"))

# 多项式(poly):指定最高阶
cb.temp <- crossbasis(data$temp, lag=10,
                      argvar = list(fun="poly", degree=2),
                      arglag = list(fun="lin"))

# 阈值函数(thr):单/双侧阈值
cb.temp <- crossbasis(data$temp, lag=10,
                      argvar = list(fun="thr", thr.value = c(19,23), side = 'd'),
                      arglag = list(fun="lin"))

# strata(地层函数):在断点间取常数
cb.temp <- crossbasis(data$temp, lag=10,
                      argvar = list(fun="strata", breaks = c(19,23)),
                      arglag = list(fun="lin"))

这里采用:暴露维 ns(knots=20)、滞后维 poly(degree=4),最大滞后 30 天;季节项 ns(time, 10*1) 与原文一致。

cb.temp <- crossbasis(data$temp, lag = 30,
                      argvar = list(fun = "ns", knots = c(20)),
                      arglag = list(fun = "poly", degree = 4))

model1 <- glm(death ~ cb.temp + ns(time, 10*1),
              family = quasipoisson(), data = data)

summary(model1)$coefficients[1:5,]
##                 Estimate Std. Error    t value    Pr(>|t|)
## (Intercept)   5.08496059 0.01991519 255.330758 0.000000000
## cb.tempv1.l1 -0.04603437 0.01469162  -3.133377 0.001737945
## cb.tempv1.l2 -0.28103045 0.22688194  -1.238664 0.215527445
## cb.tempv1.l3  1.65911443 0.93462652   1.775163 0.075931040
## cb.tempv1.l4 -2.48018906 1.41094128  -1.757826 0.078837567

6.1 2D 等高线

pred1.temp <- crosspred(cb.temp, model1,
                        cen = round(median(data$temp)), bylag = 0.2)

pal2 <- colorRampPalette(c("#F7FBFF","#DEEBF7","#9ECAE1","#6BAED6",
                           "#3182BD","#08519C","#08306B"))(200)

plot(pred1.temp, "contour",
     xlab="MeanTemp (°C)", ylab="Lag (days)",
     key.title = title("RR"), 
     plot.axes = { axis(1); axis(2) }, key.axes = axis(4))

6.2 3D 曲面

plot(pred1.temp, ticktype='detailed',
     border = "#2B8CBE", col = "#B2DF8A",
     xlab="MeanTemp (°C)", ylab="Lag (days)", zlab="RR",
     shade = 0.15, lwd = 1, theta = 35, phi = 20, ltheta = -40)

6.3 累积效应曲线(1–3/1–7/1–30 天)

op <- par(mfrow=c(1,3), mar=c(4,4,2,1))
for (w in list(c(1,3), c(1,7), c(1,30))) {
  cr <- crossreduce(cb.temp, model1, cen=20, type="overall", lag=w)
  plot(cr, xlab="Meantemp (°C)", ylab="RR",
       col="#2B8CBE", lwd=2)
  abline(h=1, lty=3, col="#777777")
  mtext(paste0("Overall cumulative ", w[1], "-", w[2], " days"), cex=.9)
}

par(op)

7 读图要点

  • 高温效应短、低温效应长:看 2D/3D 中高温随滞后快速衰减、低温衰减更慢。
  • 中心点(cen)决定“相对”风险:报告时注明选择(中位数/最舒适温度)。
  • 结果随结点/lag 微调而变:保留灵敏度分析(结点位置、df/年、lagmax)。
sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Asia/Shanghai
## tzcode source: internal
## 
## attached base packages:
## [1] splines   stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] reshape2_1.4.4 mice_3.18.0    ggplot2_4.0.0  dlnm_2.4.10   
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.10        generics_0.1.4     tidyr_1.3.1        shape_1.4.6.1     
##  [5] stringi_1.8.7      lattice_0.22-7     lme4_1.1-37        digest_0.6.37     
##  [9] magrittr_2.0.4     mitml_0.4-5        evaluate_1.0.5     grid_4.4.2        
## [13] RColorBrewer_1.1-3 iterators_1.0.14   fastmap_1.2.0      plyr_1.8.9        
## [17] foreach_1.5.2      jomo_2.7-6         jsonlite_2.0.0     Matrix_1.7-4      
## [21] glmnet_4.1-10      nnet_7.3-20        backports_1.5.0    survival_3.8-3    
## [25] mgcv_1.9-3         purrr_1.1.0        scales_1.4.0       codetools_0.2-20  
## [29] jquerylib_0.1.4    Rdpack_2.6.4       reformulas_0.4.1   cli_3.6.5         
## [33] rlang_1.1.6        rbibutils_2.3      withr_3.0.2        cachem_1.1.0      
## [37] yaml_2.3.10        pan_1.9            tools_4.4.2        nloptr_2.2.1      
## [41] minqa_1.2.8        dplyr_1.1.4        boot_1.3-32        tsModel_0.6-2     
## [45] broom_1.0.10       rpart_4.1.24       vctrs_0.6.5        R6_2.6.1          
## [49] lifecycle_1.0.4    stringr_1.5.2      MASS_7.3-65        pkgconfig_2.0.3   
## [53] pillar_1.11.1      bslib_0.9.0        gtable_0.3.6       glue_1.8.0        
## [57] Rcpp_1.1.0         xfun_0.53          tibble_3.3.0       tidyselect_1.2.1  
## [61] rstudioapi_0.17.1  knitr_1.50         farver_2.1.2       htmltools_0.5.8.1 
## [65] nlme_3.1-168       labeling_0.4.3     rmarkdown_2.29     compiler_4.4.2    
## [69] S7_0.2.0