生存曲線: Non-parametric vs Parametric models

References


## 生存曲線: Non-parametric vs Parametric models
## 参考: http://stackoverflow.com/questions/9151591/how-to-plot-the-survival-curve-generated-by-survreg-package-survival-of-r


## 使い方
## この記載をすべてコピーして手元のRのエディタに貼付ける。
## Windows版では左上 [ファイル] → [新しいスクリプト]でエディタ。
## Mac版では真っ白い紙の形のアイコンでエディタ。
## 実行する行、あるいは範囲を選択して
## WindowsであればCtrl+R、MacであればCommand+Returnで実行。


## あとで使うinst()関数を定義。パッケージがなければインストールする関数。
## ここはあまり気にしないでいいです。

inst <- function(PKG) {
    if(!(PKG %in% rownames(installed.packages()))) {
        install.packages(pkgs=PKG, dependencies=TRUE)
    }
}


## Regression Modeling Strategies (rms)パッケージをインストール
inst("rms")

## 必要なパッケージを読み込み
library(survival)
library(rms)


## Surv objectを作成
data(lung, package = "survival")
s <- with(lung, Surv(time, status))


## 群分けしない場合のグラフ
## Kaplan-Meier estimator
## よくあるカプランマイアー曲線です
km.null <- survfit(data = lung, s ~ 1)
survplot(km.null, conf = "none")

## Cox regression by Efron method
## Cox regressionから生存曲線を出します。ほぼ同じになります。
cox.null <- coxph(data = lung, s ~ 1)
lines(survfit(cox.null), col = "green", mark.time = FALSE)

## Parametric with Weibull distribution
## Weibull分布を仮定して生存曲線を引きます。滑らかな線が引けます。
weibull.null <- survreg(data = lung, s ~ 1, dist = "weibull")
lines(x = predict(weibull.null, type = "quantile", p = seq(0.01, 0.99, by=.01))[1,],
      y = rev(seq(0.01, 0.99, by = 0.01)),
      col = "red")

## Parametric with log-logistic distribution
## 同じことですが、log-logistic分布という別の分布を仮定して行います。
loglogistic.null <- survreg(data = lung, s ~ 1, dist = "loglogistic")
lines(x = predict(loglogistic.null, type = "quantile", p = seq(0.01, 0.99, by=.01))[1,],
      y = rev(seq(0.01, 0.99, by = 0.01)),
      col = "blue")

legend(x = "topright",
       legend = c("Kaplan-Meier", "Cox (Efron)", "Weibull", "Log-logistic"),
       lwd = 2, bty = "n",
       col = c("black", "green", "red", "blue"))

plot of chunk unnamed-chunk-2



## 性別で群分けした場合のグラフを同様に重ね書きします。

## Kaplan-Meier estimator
km.sex <- survfit(data = lung, s ~ sex)
survplot(km.sex, conf = "none")         # rmsパッケージのsurvplot()を使っています。

## Cox regression by Efron method
cox.sex <- coxph(data = lung, s ~ sex)
for (X in 1:2) {
    lines(survfit(cox.sex, newdata = data.frame(sex = X)), col = "green", lty = X, mark.time = FALSE)
}

## Parametric with Weibull distribution
weibull.sex <- survreg(data = lung, s ~ sex, dist = "weibull")

for (X in 1:2) {
    lines(x = predict(weibull.sex, newdata = data.frame(sex = X), type = "quantile", p = seq(0.01, 0.99, by=.01)),
          y = rev(seq(0.01, 0.99, by = 0.01)),
          col = "red", lty = X)
}

## Parametric with log-logistic distribution
loglogistic.sex <- survreg(data = lung, s ~ sex, dist = "loglogistic")

for (X in 1:2) {
    lines(x = predict(loglogistic.sex, newdata = data.frame(sex = X), type = "quantile", p = seq(0.01, 0.99, by=.01)),
          y = rev(seq(0.01, 0.99, by = 0.01)),
          col = "blue", lty = X)
}

legend(x = "topright",
       legend = c("Kaplan-Meier", "Cox (Efron)", "Weibull", "Log-logistic"),
       lwd = 2, bty = "n",
       col = c("black", "green", "red", "blue"))

plot of chunk unnamed-chunk-2