For a basic survival function, we can use the Cox Proportional Hazard method, doing so with the survival package.
# Installing package
#install.packages("survival")
# Loading package
library(survival)
## Warning: package 'survival' was built under R version 4.1.3
library(ggplot2)
library(survminer)
## Warning: package 'survminer' was built under R version 4.1.3
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.1.3
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
library(ggpubr)
library(magrittr)
## Warning: package 'magrittr' was built under R version 4.1.3
Reading in the data:
# Read in our data
turnover <- read.csv("turnover.csv", header = TRUE)
turnover$gender<-as.factor(turnover$gender)
str(turnover)
## 'data.frame': 1129 obs. of 16 variables:
## $ stag : num 7.03 22.97 15.93 15.93 8.41 ...
## $ event : int 1 1 1 1 1 1 1 1 1 1 ...
## $ gender : Factor w/ 2 levels "f","m": 2 2 1 1 2 1 1 1 1 1 ...
## $ age : num 35 33 35 35 32 42 42 28 29 30 ...
## $ industry : chr "Banks" "Banks" "PowerGeneration" "PowerGeneration" ...
## $ profession : chr "HR" "HR" "HR" "HR" ...
## $ traffic : chr "rabrecNErab" "empjs" "rabrecNErab" "rabrecNErab" ...
## $ coach : chr "no" "no" "no" "no" ...
## $ head_gender : chr "f" "m" "m" "m" ...
## $ greywage : chr "white" "white" "white" "white" ...
## $ way : chr "bus" "bus" "bus" "bus" ...
## $ extraversion: num 6.2 6.2 6.2 5.4 3 6.2 6.2 3.8 8.6 5.4 ...
## $ independ : num 4.1 4.1 6.2 7.6 4.1 6.2 6.2 5.5 6.9 5.5 ...
## $ selfcontrol : num 5.7 5.7 2.6 4.9 8 4.1 4.1 8 2.6 3.3 ...
## $ anxiety : num 7.1 7.1 4.8 2.5 7.1 5.6 5.6 4 4 7.9 ...
## $ novator : num 8.3 8.3 8.3 6.7 3.7 6.7 6.7 4.4 7.5 8.3 ...
cox1<-coxph(Surv(stag, event) ~ gender, data = turnover)
cox2<-coxph(Surv(stag, event) ~ age, data = turnover)
cox3<-coxph(Surv(stag, event) ~ profession, data = turnover)
# Summarizing the model
summary(cox1)
## Call:
## coxph(formula = Surv(stag, event) ~ gender, data = turnover)
##
## n= 1129, number of events= 571
##
## coef exp(coef) se(coef) z Pr(>|z|)
## genderm -0.15133 0.85956 0.09898 -1.529 0.126
##
## exp(coef) exp(-coef) lower .95 upper .95
## genderm 0.8596 1.163 0.708 1.044
##
## Concordance= 0.516 (se = 0.01 )
## Likelihood ratio test= 2.4 on 1 df, p=0.1
## Wald test = 2.34 on 1 df, p=0.1
## Score (logrank) test = 2.34 on 1 df, p=0.1
# Fitting survfit()
multi_cox <- survfit(cox1)
# Plotting the function
plot(multi_cox)
plot(survfit(cox2))
plot(survfit((cox3)))
summary(cox1)
## Call:
## coxph(formula = Surv(stag, event) ~ gender, data = turnover)
##
## n= 1129, number of events= 571
##
## coef exp(coef) se(coef) z Pr(>|z|)
## genderm -0.15133 0.85956 0.09898 -1.529 0.126
##
## exp(coef) exp(-coef) lower .95 upper .95
## genderm 0.8596 1.163 0.708 1.044
##
## Concordance= 0.516 (se = 0.01 )
## Likelihood ratio test= 2.4 on 1 df, p=0.1
## Wald test = 2.34 on 1 df, p=0.1
## Score (logrank) test = 2.34 on 1 df, p=0.1
This is going to look different once we get a handle on how to order variables. This doesn’t work right now because there is way too much variance.
Now we’ll show basic Kaplan-Meier method
model<-survfit(Surv(turnover$stag, turnover$event==1)~1)
model
## Call: survfit(formula = Surv(turnover$stag, turnover$event == 1) ~
## 1)
##
## n events median 0.95LCL 0.95UCL
## [1,] 1129 571 50.7 45.6 54
plot(model)