Load Libraries

Load data into R

library(readr)
## 
## Attaching package: 'readr'
## The following object is masked from 'package:scales':
## 
##     col_factor
data<- read_csv("2017_2019_Male_Data.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 5206 Columns: 3009
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1939): caseid, rscrninf, rscrage, rscrhisp, rscrrace, age_a, age_r, age...
## lgl (1070): sedbclc3, sedbclc4, sedwhlc3, sedwhlc4, sedconlc4, p2currwife, p...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#data$age_r
#data$fmarital
#data$agemarr

#data$agemarrn4

#data$agemarrn2

#data$agemarrn3
#data$intact18


### hisagem

Sub-select just variable you will be needing

# Create a new data frame 'data_2' with selected columns
data <- data[, c("age_r", "fmarital", "agemarr", "agemarrn4", "agemarrn2", "agemarrn3", "intact18", "hisagem", "evrmarry", "agemarrn")]

FOCAL VARIABLE: Whether R lived in an intact family birth to age 18

data$intact18 <- recode(data$intact18, "1='yes' ; 2 ='no' ; else=NA")

Data Cleaning

evmarried <-filter(data, fmarital <= 4)

evmarried$ageevent<- pmin(evmarried$hisagem,evmarried$agemarrn, na.rm=T)


nevmarried <-filter(data, fmarital == 5) #yes

nevmarried$ageevent <-nevmarried$age_r


atriskW4<-rbind(evmarried,nevmarried)


atriskW4 <- atriskW4 %>% 
  filter(ageevent !=98 & !is.na(ageevent))
tabyl(atriskW4$ageevent)
## Warning: Using one column matrices in `filter()` was deprecated in dplyr 1.1.0.
## ℹ Please use one dimensional logical vectors instead.
## ℹ The deprecated feature was likely used in the janitor package.
##   Please report the issue at <]8;;https://github.com/sfirke/janitor/issueshttps://github.com/sfirke/janitor/issues]8;;>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
atriskW4$censor <- atriskW4$fmarital == 5

Survival Analysis

library(ggsurvfit)
## 
## Attaching package: 'ggsurvfit'
## The following object is masked from 'package:psych':
## 
##     %+%
s1 <- survfit(Surv(ageevent, evrmarry) ~ 1, data = atriskW4)
str(s1)
## List of 16
##  $ n        : int 5110
##  $ time     : num [1:35] 15 16 17 18 19 20 21 22 23 24 ...
##  $ n.risk   : num [1:35] 5110 4924 4732 4498 4227 ...
##  $ n.event  : num [1:35] 0 6 11 42 70 84 136 121 131 139 ...
##  $ n.censor : num [1:35] 186 186 223 229 206 115 128 127 120 128 ...
##  $ surv     : num [1:35] 1 0.999 0.996 0.987 0.971 ...
##  $ std.err  : num [1:35] 0 0.000498 0.00086 0.001684 0.002611 ...
##  $ cumhaz   : num [1:35] 0 0.00122 0.00354 0.01288 0.02944 ...
##  $ std.chaz : num [1:35] 0 0.000497 0.000859 0.001678 0.002595 ...
##  $ type     : chr "right"
##  $ logse    : logi TRUE
##  $ conf.int : num 0.95
##  $ conf.type: chr "log"
##  $ lower    : num [1:35] 1 0.998 0.995 0.984 0.966 ...
##  $ upper    : num [1:35] 1 1 0.998 0.99 0.976 ...
##  $ call     : language survfit(formula = Surv(ageevent, evrmarry) ~ 1, data = atriskW4)
##  - attr(*, "class")= chr "survfit"

Showing the total survival and number at risk

Plotting the survival probability

s2 <- survfit2(Surv(ageevent, evrmarry) ~ 1, data = atriskW4) %>% 
  ggsurvfit() +
  labs(
    x = "Age",
    y = "Cumulative survival probability"
    ) + 
  add_confidence_interval() +
  add_risktable()

s2

Survival Analysis by focal variable(Living with parent till 18)

s3 <- survfit2(Surv(ageevent, evrmarry) ~ intact18, data = atriskW4) %>% 
  ggsurvfit() +
  labs(
    x = "Age",
    y = "Cumulative survival probability"
    ) + 
  add_confidence_interval() +
  add_risktable()

s3

library(dplyr)

result <- atriskW4 %>%
  group_by(ageevent) %>%
  summarise(num_failed = sum(evrmarry, na.rm = TRUE),
            num_censored = n())
result