#Install the survival package
library(survival)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
#Load the cancer dataset
head(cancer)
## inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
## 1 3 306 2 74 1 1 90 100 1175 NA
## 2 3 455 2 68 1 0 90 90 1225 15
## 3 3 1010 1 56 1 0 90 90 NA 15
## 4 5 210 2 57 1 1 90 60 1150 11
## 5 1 883 2 60 1 0 100 90 NA 0
## 6 12 1022 1 74 1 1 50 80 513 0
tail(cancer)
## inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
## 223 1 116 2 76 1 1 80 80 NA 0
## 224 1 188 1 77 1 1 80 60 NA 3
## 225 13 191 1 39 1 0 90 90 2350 -5
## 226 32 105 1 75 2 2 60 70 1025 5
## 227 6 174 1 66 1 1 90 100 1075 1
## 228 22 177 1 58 2 1 80 90 1060 0
summary(cancer)
## inst time status age
## Min. : 1.00 Min. : 5.0 Min. :1.000 Min. :39.00
## 1st Qu.: 3.00 1st Qu.: 166.8 1st Qu.:1.000 1st Qu.:56.00
## Median :11.00 Median : 255.5 Median :2.000 Median :63.00
## Mean :11.09 Mean : 305.2 Mean :1.724 Mean :62.45
## 3rd Qu.:16.00 3rd Qu.: 396.5 3rd Qu.:2.000 3rd Qu.:69.00
## Max. :33.00 Max. :1022.0 Max. :2.000 Max. :82.00
## NA's :1
## sex ph.ecog ph.karno pat.karno
## Min. :1.000 Min. :0.0000 Min. : 50.00 Min. : 30.00
## 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.: 75.00 1st Qu.: 70.00
## Median :1.000 Median :1.0000 Median : 80.00 Median : 80.00
## Mean :1.395 Mean :0.9515 Mean : 81.94 Mean : 79.96
## 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.: 90.00 3rd Qu.: 90.00
## Max. :2.000 Max. :3.0000 Max. :100.00 Max. :100.00
## NA's :1 NA's :1 NA's :3
## meal.cal wt.loss
## Min. : 96.0 Min. :-24.000
## 1st Qu.: 635.0 1st Qu.: 0.000
## Median : 975.0 Median : 7.000
## Mean : 928.8 Mean : 9.832
## 3rd Qu.:1150.0 3rd Qu.: 15.750
## Max. :2600.0 Max. : 68.000
## NA's :47 NA's :14
#Compute the mean, median of the survival times,irrespective of the censoring of survival times
mean(cancer$time)
## [1] 305.2325
median(cancer$time)
## [1] 255.5
#Import the Estriol dataset
estriol <- read.csv2('C:/Users/thuyb/OneDrive - Universiteit Antwerpen/University of Antwerp/Semester 1/Data Management/Practice/Practical session/Rpractical session1_student_files/Estriol.csv')
head(estriol)
## NR ESTRIOL BIRTHWGT
## 1 1 7 25
## 2 2 9 25
## 3 3 9 25
## 4 4 12 27
## 5 5 14 27
## 6 6 16 27
hist(estriol$ESTRIOL,
xlab = "Estriol level", ylab = "Relative frequency",
freq = F, main = "Histogram of estriol lever")
data(aml)
## Warning in data(aml): data set 'aml' not found
#Calculate descriptive measures (mean, median, variance, standard deviation, quartiles and range) for the variable `time’
summary(aml$time)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 12.50 23.00 29.48 33.50 161.00
par(mfrow = c(1,2))
hist(aml$time, main = "", xlab = "Time", freq = FALSE)
boxplot(aml$time, ylab = "Time")
How many observations are right-censored (calculate percentage).
table(aml$status)
##
## 0 1
## 5 18
sum(aml$status==0)/length(aml$status)
## [1] 0.2173913
Calculate the descriptive measure for all uncensored times (i.e. cens = 1) and seperately for treatment and control groups.
table(aml$status, aml$x)
##
## Maintained Nonmaintained
## 0 4 1
## 1 7 11
aml_1 <- aml %>%
rename(group=x) %>%
filter(status==1)
tapply(aml_1$time,aml_1$group, mean)
## Maintained Nonmaintained
## 25.14286 21.72727
tapply(aml_1$time,aml_1$group, median)
## Maintained Nonmaintained
## 23 23
tapply(aml_1$time,aml_1$group, var)
## Maintained Nonmaintained
## 183.1429 225.0182