Assignment 2 Adv. Methods of Dem. Analysis

Author

Jacob M. Souch

Import Data

library(readr)
library(janitor)

Attaching package: 'janitor'
The following objects are masked from 'package:stats':

    chisq.test, fisher.test
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ purrr     1.0.1
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.2     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggsurvfit)
library(tidycmprsk)

maledata1719 <- read_csv("C:/Users/jacob/Downloads/2017_2019_Male_Data_Final.csv")
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
  dat <- vroom(...)
  problems(dat)
Rows: 5206 Columns: 3009
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (1939): caseid, rscrninf, rscrage, rscrhisp, rscrrace, age_a, age_r, age...
lgl (1070): sedbclc3, sedbclc4, sedwhlc3, sedwhlc4, sedconlc4, p2currwife, p...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data <- maledata1719 %>% select(caseid, age_r, hisagem,havedeg,fmarital, evrmarry,agemarr,agemarrn,agemarrn2,agemarrn3,agemarrn4, expschl)

Age of Event Variable

data$ageevent <- pmin(data$hisagem,data$agemarrn, na.rm=T)

data$ageevent <- ifelse(is.na(data$ageevent), data$age_r,data$ageevent)


data0 <- data %>% filter(!(ageevent %in%(c(98))))

Censoring

data0 %>% 
  tabyl(ageevent)
 ageevent   n     percent
       15 186 0.035755479
       16 192 0.036908881
       17 234 0.044982699
       18 271 0.052095348
       19 276 0.053056517
       20 199 0.038254517
       21 264 0.050749712
       22 249 0.047866205
       23 251 0.048250673
       24 268 0.051518647
       25 285 0.054786621
       26 243 0.046712803
       27 234 0.044982699
       28 241 0.046328335
       29 209 0.040176855
       30 201 0.038638985
       31 167 0.032103037
       32 167 0.032103037
       33 150 0.028835063
       34 111 0.021337947
       35  98 0.018838908
       36  87 0.016724337
       37  75 0.014417532
       38  65 0.012495194
       39  62 0.011918493
       40  61 0.011726259
       41  48 0.009227220
       42  57 0.010957324
       43  33 0.006343714
       44  34 0.006535948
       45  34 0.006535948
       46  39 0.007497116
       47  40 0.007689350
       48  37 0.007112649
       49  34 0.006535948
data0$censor <- data0$fmarital == 5

Survival Fit 1

s1 <- survfit(Surv(ageevent, evrmarry) ~ 1, data = data0)
str(s1)
List of 16
 $ n        : int 5202
 $ time     : num [1:35] 15 16 17 18 19 20 21 22 23 24 ...
 $ n.risk   : num [1:35] 5202 5016 4824 4590 4319 ...
 $ n.event  : num [1:35] 0 6 11 42 70 84 136 122 131 140 ...
 $ n.censor : num [1:35] 186 186 223 229 206 115 128 127 120 128 ...
 $ surv     : num [1:35] 1 0.999 0.997 0.987 0.971 ...
 $ std.err  : num [1:35] 0 0.000489 0.000844 0.001651 0.002557 ...
 $ cumhaz   : num [1:35] 0 0.0012 0.00348 0.01263 0.02883 ...
 $ std.chaz : num [1:35] 0 0.000488 0.000843 0.001645 0.002541 ...
 $ type     : chr "right"
 $ logse    : logi TRUE
 $ conf.int : num 0.95
 $ conf.type: chr "log"
 $ lower    : num [1:35] 1 0.998 0.995 0.984 0.967 ...
 $ upper    : num [1:35] 1 1 0.998 0.991 0.976 ...
 $ call     : language survfit(formula = Surv(ageevent, evrmarry) ~ 1, data = data0)
 - attr(*, "class")= chr "survfit"
s1
Call: survfit(formula = Surv(ageevent, evrmarry) ~ 1, data = data0)

        n events median 0.95LCL 0.95UCL
[1,] 5202   1957     33      32      34

Survival Fit 2

s2 <- survfit2(Surv(ageevent, evrmarry) ~ 1, data = data0) %>% 
  ggsurvfit() +
  labs(
    x = "Age",
    y = "Cumulative survival probability"
    ) + 
  add_confidence_interval() +
  add_risktable()

s2

Survival Fit by Degree

data0$Degree <- car::recode(data0$havedeg, "1='Yes'; 5='No'")

s3 <- survfit2(Surv(ageevent, evrmarry) ~ Degree, data = data0) %>% 
  ggsurvfit() +
  labs(
    x = "Age",
    y = "Cumulative survival probability"
    ) + 
  add_confidence_interval() +
  add_risktable()

s3