library(tidyverse)
library(skimr)
library(survival)
library(timereg)
library(cdnbcr)
library(readxl)
# Reading
melanoma <- read_excel("C:/Users/rodri/Desktop/Simulation_CDNBCR/Applications/dados_melanoma_new.xls",
col_types = c("numeric", "numeric", "numeric", "text", "text", "text", "numeric",
"text", "numeric", "numeric", "text", "text", "numeric"))
# Transforming
melanoma <- melanoma %>% mutate(sexo = factor(sexo, labels = c("Male", "Female")),
idade_cat = factor(cut(idade, c(0, 40, 200)),
labels = c("<= 40", "> 40")),
escolari = factor(escolari,
labels = c("Illiterate", "Elementary school", "Elementary school",
"High school", "Higher education", "Not informed")),
EC_cat = factor(EC_cat, labels = c("I", "II", "III", "IV")),
quimio = factor(quimio, labels = c("No", "Yes")),
radio = factor(radio, labels = c("No", "Yes")),
cirurgia = factor(cirurgia, labels = c("No", "Yes")))
# Selecting, renaming, and excluding missing observations
melanoma <- melanoma %>% dplyr::select(tempo_anos, status, sexo, idade_cat, escolari, EC_cat,
quimio, radio, cirurgia) %>%
rename(time = tempo_anos,
delta = status,
sex = sexo,
age = idade_cat,
education_level = escolari,
clinical_stage = EC_cat,
chemotherapy = quimio,
radiotherapy = radio,
surgery = cirurgia) %>%
na.omit()
melanoma <- melanoma %>% mutate(E1=ifelse(education_level=="Illiterate",1,0),
E2=ifelse(education_level=="Elementary school",1,0),
E3=ifelse(education_level=="High school",1,0),
E4=ifelse(education_level=="Higher education",1,0),
C2=ifelse(clinical_stage=="II",1,0),
C3=ifelse(clinical_stage=="III",1,0),
C4=ifelse(clinical_stage=="IV",1,0),
Age1 = ifelse(age=="> 40",1,0),
Sex1=ifelse(sex=="Female",1,0),
Chemo1=ifelse(chemotherapy=="Yes",1,0),
Radio1=ifelse(radiotherapy=="Yes",1,0),
Surgery1=ifelse(surgery =="Yes",1,0),
)
glimpse(melanoma)
## Rows: 6,751
## Columns: 21
## $ time <dbl> 9.5402299, 0.3092501, 3.3004926, 2.2632731, 5.7060755,~
## $ delta <dbl> 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, ~
## $ sex <fct> Male, Male, Male, Female, Male, Female, Female, Male, ~
## $ age <fct> > 40, > 40, <= 40, > 40, > 40, <= 40, <= 40, <= 40, > ~
## $ education_level <fct> Not informed, Not informed, Elementary school, Element~
## $ clinical_stage <fct> II, II, I, III, I, III, II, I, III, II, IV, III, I, IV~
## $ chemotherapy <fct> No, No, No, Yes, No, Yes, No, No, No, Yes, No, Yes, No~
## $ radiotherapy <fct> No, No, Yes, Yes, No, Yes, No, No, Yes, No, No, No, No~
## $ surgery <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, No, No, No~
## $ E1 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ E2 <dbl> 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ~
## $ E3 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, ~
## $ E4 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, ~
## $ C2 <dbl> 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ C3 <dbl> 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, ~
## $ C4 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, ~
## $ Age1 <dbl> 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ Sex1 <dbl> 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, ~
## $ Chemo1 <dbl> 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, ~
## $ Radio1 <dbl> 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ~
## $ Surgery1 <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, ~
# Summary
skim(melanoma)
| Name | melanoma |
| Number of rows | 6751 |
| Number of columns | 21 |
| _______________________ | |
| Column type frequency: | |
| factor | 7 |
| numeric | 14 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| sex | 0 | 1 | FALSE | 2 | Fem: 3417, Mal: 3334 |
| age | 0 | 1 | FALSE | 2 | > 4: 5693, <= : 1058 |
| education_level | 0 | 1 | FALSE | 5 | Ele: 2649, Not: 2246, Hig: 841, Hig: 667 |
| clinical_stage | 0 | 1 | FALSE | 4 | I: 3012, II: 1542, III: 1229, IV: 968 |
| chemotherapy | 0 | 1 | FALSE | 2 | No: 5646, Yes: 1105 |
| radiotherapy | 0 | 1 | FALSE | 2 | No: 6164, Yes: 587 |
| surgery | 0 | 1 | FALSE | 2 | Yes: 5980, No: 771 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| time | 0 | 1 | 4.69 | 3.85 | 0 | 1.67 | 3.69 | 6.86 | 18.54 | ▇▅▂▁▁ |
| delta | 0 | 1 | 0.28 | 0.45 | 0 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
| E1 | 0 | 1 | 0.05 | 0.22 | 0 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| E2 | 0 | 1 | 0.39 | 0.49 | 0 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
| E3 | 0 | 1 | 0.12 | 0.33 | 0 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| E4 | 0 | 1 | 0.10 | 0.30 | 0 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| C2 | 0 | 1 | 0.23 | 0.42 | 0 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
| C3 | 0 | 1 | 0.18 | 0.39 | 0 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
| C4 | 0 | 1 | 0.14 | 0.35 | 0 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
| Age1 | 0 | 1 | 0.84 | 0.36 | 0 | 1.00 | 1.00 | 1.00 | 1.00 | ▂▁▁▁▇ |
| Sex1 | 0 | 1 | 0.51 | 0.50 | 0 | 0.00 | 1.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
| Chemo1 | 0 | 1 | 0.16 | 0.37 | 0 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
| Radio1 | 0 | 1 | 0.09 | 0.28 | 0 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| Surgery1 | 0 | 1 | 0.89 | 0.32 | 0 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
# Plots
theme_set(theme_classic())
melanoma %>%
ggplot(aes(x = time)) +
geom_histogram() +
labs(x = "Time", y = "Frequency")
mKM <- survfit(Surv(time, delta) ~ 1, melanoma, se.fit = FALSE)
plot(mKM, xlab = "Time", ylab = "Surviving function")
# Time by sex
melanoma %>%
ggplot(aes(x = sex, y = time)) +
geom_boxplot() +
labs(x = "Sex", y = "Time")
mKM <- survfit(Surv(time, delta) ~ sex, melanoma, se.fit = FALSE)
plot(mKM, xlab = "Time", ylab = "Surviving function", col = c(1, 2))
legend("bottomleft", c("Male", "Female"), col = c(1, 2), lty = 1)
# Time by age
melanoma %>%
ggplot(aes(x = age, y = time)) +
geom_boxplot() +
labs(x = "Age at diagnosis", y = "Time")
mKM <- survfit(Surv(time, delta) ~ age, melanoma, se.fit = FALSE)
plot(mKM, xlab = "Time", ylab = "Surviving function", col = c(1, 2))
legend("bottomleft", c("Less than or equal to 40", "Greater than 40"),
col = c(1, 2), lty = 1)
# Time by education level
melanoma %>%
ggplot(aes(x = education_level, y = time)) +
geom_boxplot() +
labs(x = "Education level", y = "Time")
mKM <- survfit(Surv(time, delta) ~ education_level, melanoma, se.fit = FALSE)
plot(mKM, xlab = "Time", ylab = "Surviving function", col = 1:5)
legend("bottomleft", levels(melanoma$education_level), col = 1:5, lty = 1)
# Time by clinical cancer stage
melanoma %>%
ggplot(aes(x = clinical_stage, y = time)) +
geom_boxplot() +
labs(x = "Clinical cancer stage", y = "Time")
mKM <- survfit(Surv(time, delta) ~ clinical_stage, melanoma, se.fit = FALSE)
plot(mKM, xlab = "Time", ylab = "Surviving function", col = 1:4)
legend("bottomleft", levels(melanoma$clinical_stage), col = 1:4, lty = 1)
# Time by chemotherapy
melanoma %>%
ggplot(aes(x = chemotherapy, y = time)) +
geom_boxplot() +
labs(x = "Was chemotherapy done?", y = "Time")
mKM <- survfit(Surv(time, delta) ~ chemotherapy, melanoma, se.fit = FALSE)
plot(mKM, xlab = "Time", ylab = "Surviving function", col = 1:2)
legend("bottomleft", levels(melanoma$chemotherapy), col = 1:2, lty = 1)
# Time by radiotherapy
melanoma %>%
ggplot(aes(x = chemotherapy, y = time)) +
geom_boxplot() +
labs(x = "Was radiotherapy done?", y = "Time")
mKM <- survfit(Surv(time, delta) ~ radiotherapy, melanoma, se.fit = FALSE)
plot(mKM, xlab = "Time", ylab = "Surviving function", col = 1:2)
legend("bottomleft", levels(melanoma$radiotherapy), col = 1:2, lty = 1)
# Time by surgery
melanoma %>%
ggplot(aes(x = surgery, y = time)) +
geom_boxplot() +
labs(x = "Was surgery done?", y = "Time")
mKM <- survfit(Surv(time, delta) ~ surgery, melanoma, se.fit = FALSE)
plot(mKM, xlab = "Time", ylab = "Surviving function", col = 1:2)
legend("bottomleft", levels(melanoma$surgery), col = 1:2, lty = 1)
Consider the following regression structure $$ \[\begin{align*} \log(\theta_i) & = \beta_{11}I(\texttt{sex}_i == ``\textrm{Female}") + \beta_{12}I(\texttt{age}_i == ``\textrm{> 40}") + \\ & \hspace{0.5cm} \beta_{13}I(\texttt{education}_i == ``\textrm{Illiterate}") + \beta_{14}I(\texttt{education}_i == ``\textrm{Elementary school}") + \\ & \hspace{0.5cm} \beta_{15}I(\texttt{education}_i == ``\textrm{High school}") + \beta_{16}I(\texttt{education}_i == ``\textrm{Higher education}"),\\ \log\left(\dfrac{p_i}{1 - p_i}\right) & = \beta_{20} + \beta_{21}I(\texttt{clinical_stage}_i == ``\textrm{II}") + \beta_{22}I(\texttt{clinical_stage}_i == ``\textrm{III}") +\\ & \hspace{0.5cm} \beta_{23}I(\texttt{clinical_stage}_i == ``\textrm{IV}") + \beta_{24}I(\texttt{chemotheray}_i == ``\textrm{Yes}") +\\ & \hspace{0.5cm} \beta_{25}I(\texttt{radiotheray}_i == ``\textrm{Yes}") + \beta_{26}I(\texttt{surgery}_i == ``\textrm{Yes}"),\\ \\ \log(\mu_i) & = \beta_{30} + \beta_{31}I(\texttt{chemotheray}_i == ``\textrm{Yes}") + \beta_{32}I(\texttt{radiotheray}_i == ``\textrm{Yes}") + \\ & \hspace{0.5cm} \beta_{33}I(\texttt{surgery}_i == ``\textrm{Yes}"). \end{align*}\] $$
Consider the test of the following hypotheses
\[ \begin{align*} \textrm{H}_0: \alpha = 0 \quad \textrm{versus} \quad \textrm{H}_a: \alpha \neq 0. \end{align*} \]
We consider the following statistics \[ \Lambda = 2 \{\ell(\mathbf{\widehat{\psi}}) - \ell(\mathbf{\widetilde{\psi}})\}, \] where \(\mathbf{\widehat{\psi}}\) and \(\mathbf{\widetilde{\psi}}\) are the unrestricted and restricted (under the null hypothesis) maximum likelihood estimates of \(\psi = (\mathbf{\beta}_1^\top, \phi, \beta_2^\top, \alpha, \beta_3^\top, \sigma)^\top\).
Under mild regularity conditions, we have that, under \(\textrm{H}_0\), \[ \Lambda \overset{a}{\sim} 0.5 \; \chi^2_1 + 0.5 \; \chi^2_0. \]
## Test statistic
Lambda <- - 2 * (fit0$logLik - fit$logLik)
## Summary
data.frame(Lambda = Lambda,
Critical_value = qchisq(1 - 2 * 0.05, 1))
## Lambda Critical_value
## 1 -0.0270853 2.705543