## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
##
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
##
## Attaching package: 'gdata'
## The following objects are masked from 'package:dplyr':
##
## combine, first, last
## The following object is masked from 'package:purrr':
##
## keep
## The following object is masked from 'package:stats':
##
## nobs
## The following object is masked from 'package:utils':
##
## object.size
## The following object is masked from 'package:base':
##
## startsWith
Rによる生存時間解析 下記サイトを元に勉強
https://zenn.dev/masahiro_kondo/articles/a71d9cbba294fa
#変数の作成
th1 <- prostate %>%
mutate(rx_c = case_when(
rx == "placebo" ~ 0,
rx == "0.2 mg estrogen" ~ 1,
rx == "1.0 mg estrogen" ~ 2,
rx == "5.0 mg estrogen" ~ 3
)) %>%
mutate(rx_c2 = case_when(
rx == "placebo" ~ 0,
rx == "0.2 mg estrogen" ~ 1,
rx == "1.0 mg estrogen" ~ 1,
rx == "5.0 mg estrogen" ~ 1
)) %>%
mutate(status2 = case_when(
status == "alive" ~ 0,
status == "dead - prostatic ca" ~ 1,
status %in% c("dead - heart or vascular","dead - cerebrovascular") ~ 2,
TRUE ~ 3
)) %>%
mutate(status3 =
ifelse(status2==0,0,
ifelse(status2==2,1,0))) %>%
mutate(status4 =
ifelse(status2==0,0,
ifelse(status2==3,1,0))) %>%
mutate(status5 =
ifelse(status2==1,1,0)) %>%
mutate(stage_c=as.factor(stage)) %>%
mutate(dtime_y=dtime/12)
th2 <- th1 %>% dplyr::select(rx_c,age,stage_c,status,dtime_y,status5)
table1 <- tbl_summary(
th2,
by = rx_c,
missing = "ifany",
type = list(c(age, dtime_y) ~ "continuous")) %>%
add_n() %>%
modify_header(label = "**Variable**")
table1
| Variable | N | 0, N = 1271 | 1, N = 1241 | 2, N = 1261 | 3, N = 1251 |
|---|---|---|---|---|---|
| age | 501 | 73 (70, 76) | 73 (70, 75) | 73 (69, 76) | 73 (69, 76) |
| Unknown | 0 | 0 | 1 | 0 | |
| stage_c | 502 | ||||
| 3 | 74 (58%) | 73 (59%) | 71 (56%) | 71 (57%) | |
| 4 | 53 (42%) | 51 (41%) | 55 (44%) | 54 (43%) | |
| status | 502 | ||||
| alive | 32 (25%) | 29 (23%) | 55 (44%) | 32 (26%) | |
| dead - cerebrovascular | 7 (5.5%) | 6 (4.8%) | 11 (8.7%) | 7 (5.6%) | |
| dead - heart or vascular | 27 (21%) | 19 (15%) | 14 (11%) | 36 (29%) | |
| dead - other ca | 3 (2.4%) | 9 (7.3%) | 9 (7.1%) | 4 (3.2%) | |
| dead - other specific non-ca | 10 (7.9%) | 9 (7.3%) | 4 (3.2%) | 5 (4.0%) | |
| dead - prostatic ca | 37 (29%) | 42 (34%) | 24 (19%) | 27 (22%) | |
| dead - pulmonary embolus | 2 (1.6%) | 2 (1.6%) | 3 (2.4%) | 7 (5.6%) | |
| dead - respiratory disease | 6 (4.7%) | 4 (3.2%) | 4 (3.2%) | 2 (1.6%) | |
| dead - unknown cause | 1 (0.8%) | 3 (2.4%) | 1 (0.8%) | 2 (1.6%) | |
| dead - unspecified non-ca | 2 (1.6%) | 1 (0.8%) | 1 (0.8%) | 3 (2.4%) | |
| dtime_y | 502 | 2.75 (1.33, 4.83) | 2.50 (1.15, 4.77) | 3.46 (1.33, 4.92) | 2.92 (1.00, 4.50) |
| status5 | 502 | 37 (29%) | 42 (34%) | 24 (19%) | 27 (22%) |
| 1 Median (IQR); n (%) | |||||
#KM曲線
fit <- survfit( Surv(dtime_y, status5) ~ rx_c2, data = th1 )
ggsurvplot(
fit,
data = th1,
risk.table = TRUE,
pval = TRUE,
conf.int = TRUE,
xlab = "YEAR",
ggtheme = theme_light(),
risk.table.y.text.col = T,
risk.table.y.text = FALSE,
legend.labs =
c("Placebo", "Estrogen")
)
生存曲線がクロスする理由 1)奏効する部分集団と奏効しない部分集団が混在 2)途中脱落が多く,後半の差は精度不足で偶然で生じた.(時間軸の限定が必要) 3)OSで治療のクロスオーバーが大量に起きた. 4)mild(延命)な治療とintensive(治癒)な治療の比較.
引用 https://www.croit.com/wp_croit2/wp-content/uploads/2018/11/16_01.pdf
1)の例がわかりやすい
2)は、リスクテーブルから判断すべきか 3はよく分からない 4はそもそも比較すべきでないってことか
fit2 <- survfit( Surv(dtime_y, status5) ~ rx_c, data = th1 )
ggsurvplot(
fit2,
data = th1,
risk.table = TRUE,
pval = TRUE,
conf.int = FALSE,
xlab = "YEAR",
ggtheme = theme_light(),
risk.table.y.text.col = T,
risk.table.y.text = FALSE,
legend.labs =
c("Placebo", "0.2 mg estrogen", "1.0 mg estrogen", "5.0 mg estrogen")
)
## 生存時間分析
survfit(Surv(dtime_y, status5) ~ rx_c2, data = th1)
## Call: survfit(formula = Surv(dtime_y, status5) ~ rx_c2, data = th1)
##
## n events median 0.95LCL 0.95UCL
## rx_c2=0 127 37 5.92 5.75 NA
## rx_c2=1 375 93 NA NA NA
何で信頼区間とメジアンが並んでいるのだろう? LCLとかUCLと書いてるが、信頼区間ではないと思う。(それとも、これも信頼区間と言うのか?) 0.95分位点じゃないの?? →違った。中央値の信頼区間(U検定を実施してると思う)
quantile(fit, c(1-0.95,1-0.5,1-0.05))
## $quantile
## 5 50 95
## rx_c2=0 0.5833333 5.916667 NA
## rx_c2=1 0.7500000 NA NA
##
## $lower
## 5 50 95
## rx_c2=0 0.1666667 5.75 NA
## rx_c2=1 0.5000000 NA NA
##
## $upper
## 5 50 95
## rx_c2=0 1.5 NA NA
## rx_c2=1 1.0 NA NA
どんな値でも信頼区間って出せるんだね、、、
#Survival probability at a certain time
summary(fit,times = 5)
## Call: survfit(formula = Surv(dtime_y, status5) ~ rx_c2, data = th1)
##
## rx_c2=0
## time n.risk n.event survival std.err lower 95% CI
## 5.000 28.000 35.000 0.626 0.052 0.532
## upper 95% CI
## 0.737
##
## rx_c2=1
## time n.risk n.event survival std.err lower 95% CI
## 5.0000 81.0000 89.0000 0.6638 0.0311 0.6055
## upper 95% CI
## 0.7278