crossbasis()
把温度×滞后变成一组新变量,然后在 (类)Poisson 的 GLM
框架中拟合,最后用 crosspred() 与
crossreduce() 还原为直观图形。crossbasis 理论与实现;之后扩展到惩罚样条、降维合并与 meta
分析等。\[ Y_t \sim \text{Poisson/Quasi-Poisson}(\mu_t), \quad \log \mu_t = \alpha + \underbrace{cb(X_t,\ldots,X_{t-L})}_{\text{cross-basis}} + s(\text{time}) + \text{dow} + \cdots \]
其中 cb() 由 crossbasis()
产生,s(time) 常用样条控制季节/长期趋势。
library(dlnm); library(ggplot2); library(mice); library(reshape2); library(splines)
# 调用芝加哥数据(来自 dlnm 包)
data("chicagoNMMAPS")
data <- as.data.frame(chicagoNMMAPS)
# —— 数据预处理 ——
data[data$death > 200, 'death'] <- NA # 离群值 -> NA
data[data$cvd > 100, 'cvd'] <- NA
data[data$resp > 200, 'resp'] <- NA
# 生成两日间温差序列
data$temp_dif <- c(NA, diff(data$temp))
# mice 插补
miceMod <- mice(data, printFlag = FALSE)
data <- complete(miceMod)
# 进一步把高值 pm10 标为 NA 并再次插补
data[data$pm10 > 150, 'pm10'] <- NA
miceMod <- mice(data, printFlag = FALSE)
data <- complete(miceMod)
plot_varible <- c("death", "cvd", "resp", "temp", "temp_dif", "pm10", "o3")
plot_data <- melt(data, id=c('date'), measure.vars = plot_varible)
plot_data$variable <- factor(
plot_data$variable,
levels = c("death","cvd","resp","temp","temp_dif","pm10","o3"),
labels = c('死亡人数','心血管疾病致死','呼吸道疾病致死','平均温度','两天间温度差',
expression(paste(PM[10], " (", mu, "g/", m^3, ")")),
expression(paste(O[3], " (", mu, "g/", m^3, ")")))
)
# 统一色板(7色),配合 facet 独立 y 轴
cols7 <- c("#44546A","#1F77B4","#2CA02C","#D62728","#9467BD","#8C564B","#17BECF")
ggplot(plot_data, aes(x = as.factor(date), y = value, color = variable, fill = variable)) +
geom_point(alpha=.45, shape=16, size=.7, stroke=0) +
scale_color_manual(values = cols7) +
scale_fill_manual(values = cols7) +
scale_y_continuous(name = NULL) +
scale_x_discrete(name = "Time",
breaks = c('1988-01-01','1990-01-01','1992-01-01','1994-01-01',
'1996-01-01','1998-01-01','2000-01-01')) +
theme_minimal(base_size = 10) +
theme(legend.position = "none",
panel.grid.minor = element_blank(),
strip.text = element_text(face="bold"),
axis.text.x = element_text(angle = 0, vjust=.9, hjust=.9)) +
facet_wrap(~variable, ncol=1, scales='free', labeller = label_parsed)
ggplot(data, aes(x = temp, y = death)) +
geom_point(alpha=.35, size=1.1, color="#2B8CBE") +
labs(x = "Temperature (°C)", y = "Death") +
theme_minimal(base_size = 11) +
theme(panel.grid.minor = element_blank())
scatter_plot <- function(input1, piece) {
input <- data[[input1]]
m <- max(input, na.rm = TRUE)
n <- min(input, na.rm = TRUE)
output <- NULL
for (i in ceiling(piece*n/m):(piece-1)) {
sel <- (m*i/piece < input & input < m*(i+1)/piece)
y_rate <- mean(data[sel, 'death'], na.rm = TRUE)
number <- sum(sel, na.rm = TRUE)
a <- cbind((i/piece)*m + m/(2*piece), y_rate, number)
output <- rbind(output, a)
}
output <- as.data.frame(output)
colnames(output) <- c(input1,'death','number')
ggplot(output, aes(x = .data[[input1]], y = death, size = number, color = number)) +
scale_color_gradient2(low = "#FFF7EC", mid = "#FDAE6B", high = "#E6550D") +
geom_point(alpha=.75) +
labs(x = input1, y = "Mean Death in Bin") +
theme_minimal(base_size = 11) +
theme(legend.position = "bottom") +
scale_size_continuous(range = c(0.8, 6))
}
scatter_plot('temp', 200)
示例与说明:下面给出多种
fun的写法示例。为了避免在教学时被“参数不匹配”打断,示例块不执行;真正用于估计的模型见下一节。
# B 样条(bs):degree=2,温度在 20°C 处设节点
cb.temp <- crossbasis(data$temp, lag=10,
argvar = list(fun="bs", degree=2, knots = c(20)),
arglag = list(fun="lin"))
# 自然样条(ns):knots 与 bs 类似,注意 ns 通常用 df 或 knots 二选一
cb.temp <- crossbasis(data$temp, lag=10,
argvar = list(fun="ns", knots = c(20)),
arglag = list(fun="lin"))
# 多项式(poly):指定最高阶
cb.temp <- crossbasis(data$temp, lag=10,
argvar = list(fun="poly", degree=2),
arglag = list(fun="lin"))
# 阈值函数(thr):单/双侧阈值
cb.temp <- crossbasis(data$temp, lag=10,
argvar = list(fun="thr", thr.value = c(19,23), side = 'd'),
arglag = list(fun="lin"))
# strata(地层函数):在断点间取常数
cb.temp <- crossbasis(data$temp, lag=10,
argvar = list(fun="strata", breaks = c(19,23)),
arglag = list(fun="lin"))
这里采用:暴露维
ns(knots=20)、滞后维poly(degree=4),最大滞后 30 天;季节项ns(time, 10*1)与原文一致。
cb.temp <- crossbasis(data$temp, lag = 30,
argvar = list(fun = "ns", knots = c(20)),
arglag = list(fun = "poly", degree = 4))
model1 <- glm(death ~ cb.temp + ns(time, 10*1),
family = quasipoisson(), data = data)
summary(model1)$coefficients[1:5,]
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.08496059 0.01991519 255.330758 0.000000000
## cb.tempv1.l1 -0.04603437 0.01469162 -3.133377 0.001737945
## cb.tempv1.l2 -0.28103045 0.22688194 -1.238664 0.215527445
## cb.tempv1.l3 1.65911443 0.93462652 1.775163 0.075931040
## cb.tempv1.l4 -2.48018906 1.41094128 -1.757826 0.078837567
pred1.temp <- crosspred(cb.temp, model1,
cen = round(median(data$temp)), bylag = 0.2)
pal2 <- colorRampPalette(c("#F7FBFF","#DEEBF7","#9ECAE1","#6BAED6",
"#3182BD","#08519C","#08306B"))(200)
plot(pred1.temp, "contour",
xlab="MeanTemp (°C)", ylab="Lag (days)",
key.title = title("RR"),
plot.axes = { axis(1); axis(2) }, key.axes = axis(4))
plot(pred1.temp, ticktype='detailed',
border = "#2B8CBE", col = "#B2DF8A",
xlab="MeanTemp (°C)", ylab="Lag (days)", zlab="RR",
shade = 0.15, lwd = 1, theta = 35, phi = 20, ltheta = -40)
op <- par(mfrow=c(1,3), mar=c(4,4,2,1))
for (w in list(c(1,3), c(1,7), c(1,30))) {
cr <- crossreduce(cb.temp, model1, cen=20, type="overall", lag=w)
plot(cr, xlab="Meantemp (°C)", ylab="RR",
col="#2B8CBE", lwd=2)
abline(h=1, lty=3, col="#777777")
mtext(paste0("Overall cumulative ", w[1], "-", w[2], " days"), cex=.9)
}
par(op)
sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Asia/Shanghai
## tzcode source: internal
##
## attached base packages:
## [1] splines stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] reshape2_1.4.4 mice_3.18.0 ggplot2_4.0.0 dlnm_2.4.10
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.10 generics_0.1.4 tidyr_1.3.1 shape_1.4.6.1
## [5] stringi_1.8.7 lattice_0.22-7 lme4_1.1-37 digest_0.6.37
## [9] magrittr_2.0.4 mitml_0.4-5 evaluate_1.0.5 grid_4.4.2
## [13] RColorBrewer_1.1-3 iterators_1.0.14 fastmap_1.2.0 plyr_1.8.9
## [17] foreach_1.5.2 jomo_2.7-6 jsonlite_2.0.0 Matrix_1.7-4
## [21] glmnet_4.1-10 nnet_7.3-20 backports_1.5.0 survival_3.8-3
## [25] mgcv_1.9-3 purrr_1.1.0 scales_1.4.0 codetools_0.2-20
## [29] jquerylib_0.1.4 Rdpack_2.6.4 reformulas_0.4.1 cli_3.6.5
## [33] rlang_1.1.6 rbibutils_2.3 withr_3.0.2 cachem_1.1.0
## [37] yaml_2.3.10 pan_1.9 tools_4.4.2 nloptr_2.2.1
## [41] minqa_1.2.8 dplyr_1.1.4 boot_1.3-32 tsModel_0.6-2
## [45] broom_1.0.10 rpart_4.1.24 vctrs_0.6.5 R6_2.6.1
## [49] lifecycle_1.0.4 stringr_1.5.2 MASS_7.3-65 pkgconfig_2.0.3
## [53] pillar_1.11.1 bslib_0.9.0 gtable_0.3.6 glue_1.8.0
## [57] Rcpp_1.1.0 xfun_0.53 tibble_3.3.0 tidyselect_1.2.1
## [61] rstudioapi_0.17.1 knitr_1.50 farver_2.1.2 htmltools_0.5.8.1
## [65] nlme_3.1-168 labeling_0.4.3 rmarkdown_2.29 compiler_4.4.2
## [69] S7_0.2.0