Code for Survival Analysis

For a basic survival function, we can use the Cox Proportional Hazard method, doing so with the survival package.

# Installing package
#install.packages("survival")

# Loading package
library(survival)
## Warning: package 'survival' was built under R version 4.1.3
library(ggplot2)
library(survminer)
## Warning: package 'survminer' was built under R version 4.1.3
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.1.3
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma
library(ggpubr)
library(magrittr)
## Warning: package 'magrittr' was built under R version 4.1.3

Reading in the data:

# Read in our data
turnover <- read.csv("turnover.csv", header = TRUE)
turnover$gender<-as.factor(turnover$gender)
str(turnover)
## 'data.frame':    1129 obs. of  16 variables:
##  $ stag        : num  7.03 22.97 15.93 15.93 8.41 ...
##  $ event       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ gender      : Factor w/ 2 levels "f","m": 2 2 1 1 2 1 1 1 1 1 ...
##  $ age         : num  35 33 35 35 32 42 42 28 29 30 ...
##  $ industry    : chr  "Banks" "Banks" "PowerGeneration" "PowerGeneration" ...
##  $ profession  : chr  "HR" "HR" "HR" "HR" ...
##  $ traffic     : chr  "rabrecNErab" "empjs" "rabrecNErab" "rabrecNErab" ...
##  $ coach       : chr  "no" "no" "no" "no" ...
##  $ head_gender : chr  "f" "m" "m" "m" ...
##  $ greywage    : chr  "white" "white" "white" "white" ...
##  $ way         : chr  "bus" "bus" "bus" "bus" ...
##  $ extraversion: num  6.2 6.2 6.2 5.4 3 6.2 6.2 3.8 8.6 5.4 ...
##  $ independ    : num  4.1 4.1 6.2 7.6 4.1 6.2 6.2 5.5 6.9 5.5 ...
##  $ selfcontrol : num  5.7 5.7 2.6 4.9 8 4.1 4.1 8 2.6 3.3 ...
##  $ anxiety     : num  7.1 7.1 4.8 2.5 7.1 5.6 5.6 4 4 7.9 ...
##  $ novator     : num  8.3 8.3 8.3 6.7 3.7 6.7 6.7 4.4 7.5 8.3 ...
cox1<-coxph(Surv(stag, event) ~ gender, data = turnover)
cox2<-coxph(Surv(stag, event) ~ age, data = turnover)
cox3<-coxph(Surv(stag, event) ~ profession, data = turnover)

# Summarizing the model
summary(cox1)
## Call:
## coxph(formula = Surv(stag, event) ~ gender, data = turnover)
## 
##   n= 1129, number of events= 571 
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)
## genderm -0.15133   0.85956  0.09898 -1.529    0.126
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## genderm    0.8596      1.163     0.708     1.044
## 
## Concordance= 0.516  (se = 0.01 )
## Likelihood ratio test= 2.4  on 1 df,   p=0.1
## Wald test            = 2.34  on 1 df,   p=0.1
## Score (logrank) test = 2.34  on 1 df,   p=0.1
# Fitting survfit()
multi_cox <- survfit(cox1)


# Plotting the function
plot(multi_cox)

plot(survfit(cox2))

plot(survfit((cox3)))

summary(cox1)
## Call:
## coxph(formula = Surv(stag, event) ~ gender, data = turnover)
## 
##   n= 1129, number of events= 571 
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)
## genderm -0.15133   0.85956  0.09898 -1.529    0.126
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## genderm    0.8596      1.163     0.708     1.044
## 
## Concordance= 0.516  (se = 0.01 )
## Likelihood ratio test= 2.4  on 1 df,   p=0.1
## Wald test            = 2.34  on 1 df,   p=0.1
## Score (logrank) test = 2.34  on 1 df,   p=0.1

This is going to look different once we get a handle on how to order variables. This doesn’t work right now because there is way too much variance.

Now we’ll show basic Kaplan-Meier method

model<-survfit(Surv(turnover$stag, turnover$event==1)~1)

model
## Call: survfit(formula = Surv(turnover$stag, turnover$event == 1) ~ 
##     1)
## 
##         n events median 0.95LCL 0.95UCL
## [1,] 1129    571   50.7    45.6      54
plot(model)