## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

## Loading required package: ggpubr

## 
## Attaching package: 'survminer'

## The following object is masked from 'package:survival':
## 
##     myeloma

## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.

##

## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.

## 
## Attaching package: 'gdata'

## The following objects are masked from 'package:dplyr':
## 
##     combine, first, last

## The following object is masked from 'package:purrr':
## 
##     keep

## The following object is masked from 'package:stats':
## 
##     nobs

## The following object is masked from 'package:utils':
## 
##     object.size

## The following object is masked from 'package:base':
## 
##     startsWith

Rによる生存時間解析下記サイトを元に勉強

https://zenn.dev/masahiro_kondo/articles/a71d9cbba294fa

#変数の作成
th1 <- prostate %>% 
  mutate(rx_c = case_when(
    rx == "placebo" ~ 0,
    rx == "0.2 mg estrogen" ~ 1,
    rx == "1.0 mg estrogen" ~ 2,
    rx == "5.0 mg estrogen" ~ 3
  )) %>% 
  mutate(rx_c2 = case_when(
    rx == "placebo" ~ 0,
    rx == "0.2 mg estrogen" ~ 1,
    rx == "1.0 mg estrogen" ~ 1,
    rx == "5.0 mg estrogen" ~ 1
  )) %>% 
  mutate(status2 = case_when(
    status == "alive" ~ 0,
    status == "dead - prostatic ca" ~ 1,
    status %in% c("dead - heart or vascular","dead - cerebrovascular") ~ 2,
    TRUE ~ 3
  )) %>% 
  mutate(status3 =
           ifelse(status2==0,0,
                  ifelse(status2==2,1,0))) %>% 
  mutate(status4 = 
           ifelse(status2==0,0,
                  ifelse(status2==3,1,0))) %>% 
  mutate(status5 = 
           ifelse(status2==1,1,0)) %>% 
  mutate(stage_c=as.factor(stage)) %>% 
  mutate(dtime_y=dtime/12)

th2 <- th1 %>% dplyr::select(rx_c,age,stage_c,status,dtime_y,status5) 
  
table1 <- tbl_summary(
  th2,
  by = rx_c,
  missing = "ifany",
  type = list(c(age, dtime_y) ~ "continuous")) %>% 
  add_n() %>% 
  modify_header(label = "**Variable**")
table1

Variable	N	0, N = 127¹	1, N = 124¹	2, N = 126¹	3, N = 125¹
age	501	73 (70, 76)	73 (70, 75)	73 (69, 76)	73 (69, 76)
Unknown		0	0	1	0
stage_c	502
3		74 (58%)	73 (59%)	71 (56%)	71 (57%)
4		53 (42%)	51 (41%)	55 (44%)	54 (43%)
status	502
alive		32 (25%)	29 (23%)	55 (44%)	32 (26%)
dead - cerebrovascular		7 (5.5%)	6 (4.8%)	11 (8.7%)	7 (5.6%)
dead - heart or vascular		27 (21%)	19 (15%)	14 (11%)	36 (29%)
dead - other ca		3 (2.4%)	9 (7.3%)	9 (7.1%)	4 (3.2%)
dead - other specific non-ca		10 (7.9%)	9 (7.3%)	4 (3.2%)	5 (4.0%)
dead - prostatic ca		37 (29%)	42 (34%)	24 (19%)	27 (22%)
dead - pulmonary embolus		2 (1.6%)	2 (1.6%)	3 (2.4%)	7 (5.6%)
dead - respiratory disease		6 (4.7%)	4 (3.2%)	4 (3.2%)	2 (1.6%)
dead - unknown cause		1 (0.8%)	3 (2.4%)	1 (0.8%)	2 (1.6%)
dead - unspecified non-ca		2 (1.6%)	1 (0.8%)	1 (0.8%)	3 (2.4%)
dtime_y	502	2.75 (1.33, 4.83)	2.50 (1.15, 4.77)	3.46 (1.33, 4.92)	2.92 (1.00, 4.50)
status5	502	37 (29%)	42 (34%)	24 (19%)	27 (22%)
¹ Median (IQR); n (%)

プラセボ群と、介入群での比較

カプランマイヤー

#KM曲線
fit <- survfit( Surv(dtime_y, status5) ~ rx_c2, data = th1 )
ggsurvplot(
  fit,                     
  data = th1,             
  risk.table = TRUE,       
  pval = TRUE,             
  conf.int = TRUE,         
  xlab = "YEAR", 
  ggtheme = theme_light(), 
  risk.table.y.text.col = T, 
  risk.table.y.text = FALSE, 
  legend.labs =
   c("Placebo", "Estrogen")
)

生存曲線のクロスをどう見るべき？

生存曲線がクロスする理由 1）奏効する部分集団と奏効しない部分集団が混在 2）途中脱落が多く，後半の差は精度不足で偶然で生じた．（時間軸の限定が必要） 3）ＯＳで治療のクロスオーバーが大量に起きた． 4）ｍｉｌｄ(延命）な治療とｉｎｔｅｎｓｉｖｅ（治癒）な治療の比較.

引用 https://www.croit.com/wp_croit2/wp-content/uploads/2018/11/16_01.pdf

1）の例がわかりやすい 1の例

2）は、リスクテーブルから判断すべきか 3はよく分からない 4はそもそも比較すべきでないってことか

介入のクラス毎のカプランマイヤー

fit2 <- survfit( Surv(dtime_y, status5) ~ rx_c, data = th1 )

ggsurvplot(
  fit2,                     
  data = th1,             
  risk.table = TRUE,       
  pval = TRUE,             
  conf.int = FALSE,         
  xlab = "YEAR", 
  ggtheme = theme_light(), 
  risk.table.y.text.col = T, 
  risk.table.y.text = FALSE, 
  legend.labs =
    c("Placebo", "0.2 mg estrogen", "1.0 mg estrogen", "5.0 mg estrogen")
)

## 生存時間分析

survfit(Surv(dtime_y, status5) ~ rx_c2, data = th1)

## Call: survfit(formula = Surv(dtime_y, status5) ~ rx_c2, data = th1)
## 
##           n events median 0.95LCL 0.95UCL
## rx_c2=0 127     37   5.92    5.75      NA
## rx_c2=1 375     93     NA      NA      NA

何で信頼区間とメジアンが並んでいるのだろう？ LCLとかUCLと書いてるが、信頼区間ではないと思う。（それとも、これも信頼区間と言うのか？） 0.95分位点じゃないの？？ →違った。中央値の信頼区間（U検定を実施してると思う）

quantile(fit, c(1-0.95,1-0.5,1-0.05))

## $quantile
##                 5       50 95
## rx_c2=0 0.5833333 5.916667 NA
## rx_c2=1 0.7500000       NA NA
## 
## $lower
##                 5   50 95
## rx_c2=0 0.1666667 5.75 NA
## rx_c2=1 0.5000000   NA NA
## 
## $upper
##           5 50 95
## rx_c2=0 1.5 NA NA
## rx_c2=1 1.0 NA NA

5年生存の割合

どんな値でも信頼区間って出せるんだね、、、

#Survival probability at a certain time
summary(fit,times = 5)

## Call: survfit(formula = Surv(dtime_y, status5) ~ rx_c2, data = th1)
## 
##                 rx_c2=0 
##         time       n.risk      n.event     survival      std.err lower 95% CI 
##        5.000       28.000       35.000        0.626        0.052        0.532 
## upper 95% CI 
##        0.737 
## 
##                 rx_c2=1 
##         time       n.risk      n.event     survival      std.err lower 95% CI 
##       5.0000      81.0000      89.0000       0.6638       0.0311       0.6055 
## upper 95% CI 
##       0.7278

R Notebook

プラセボ群と、介入群での比較

カプランマイヤー

生存曲線のクロスをどう見るべき？

介入のクラス毎のカプランマイヤー

5年生存の割合

ログランク検定