Load Libraries
Load data into R
library(readr)
##
## Attaching package: 'readr'
## The following object is masked from 'package:scales':
##
## col_factor
data<- read_csv("2017_2019_Male_Data.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 5206 Columns: 3009
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1939): caseid, rscrninf, rscrage, rscrhisp, rscrrace, age_a, age_r, age...
## lgl (1070): sedbclc3, sedbclc4, sedwhlc3, sedwhlc4, sedconlc4, p2currwife, p...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#data$age_r
#data$fmarital
#data$agemarr
#data$agemarrn4
#data$agemarrn2
#data$agemarrn3
#data$intact18
### hisagem
Sub-select just variable you will be needing
# Create a new data frame 'data_2' with selected columns
data <- data[, c("age_r", "fmarital", "agemarr", "agemarrn4", "agemarrn2", "agemarrn3", "intact18", "hisagem", "evrmarry", "agemarrn")]
FOCAL VARIABLE: Whether R lived in an intact family birth to age
18
data$intact18 <- recode(data$intact18, "1='yes' ; 2 ='no' ; else=NA")
Data Cleaning
evmarried <-filter(data, fmarital <= 4)
evmarried$ageevent<- pmin(evmarried$hisagem,evmarried$agemarrn, na.rm=T)
nevmarried <-filter(data, fmarital == 5) #yes
nevmarried$ageevent <-nevmarried$age_r
atriskW4<-rbind(evmarried,nevmarried)
atriskW4 <- atriskW4 %>%
filter(ageevent !=98 & !is.na(ageevent))
tabyl(atriskW4$ageevent)
## Warning: Using one column matrices in `filter()` was deprecated in dplyr 1.1.0.
## ℹ Please use one dimensional logical vectors instead.
## ℹ The deprecated feature was likely used in the janitor package.
## Please report the issue at <]8;;https://github.com/sfirke/janitor/issueshttps://github.com/sfirke/janitor/issues]8;;>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
atriskW4$censor <- atriskW4$fmarital == 5
Survival Analysis
library(ggsurvfit)
##
## Attaching package: 'ggsurvfit'
## The following object is masked from 'package:psych':
##
## %+%
s1 <- survfit(Surv(ageevent, evrmarry) ~ 1, data = atriskW4)
str(s1)
## List of 16
## $ n : int 5110
## $ time : num [1:35] 15 16 17 18 19 20 21 22 23 24 ...
## $ n.risk : num [1:35] 5110 4924 4732 4498 4227 ...
## $ n.event : num [1:35] 0 6 11 42 70 84 136 121 131 139 ...
## $ n.censor : num [1:35] 186 186 223 229 206 115 128 127 120 128 ...
## $ surv : num [1:35] 1 0.999 0.996 0.987 0.971 ...
## $ std.err : num [1:35] 0 0.000498 0.00086 0.001684 0.002611 ...
## $ cumhaz : num [1:35] 0 0.00122 0.00354 0.01288 0.02944 ...
## $ std.chaz : num [1:35] 0 0.000497 0.000859 0.001678 0.002595 ...
## $ type : chr "right"
## $ logse : logi TRUE
## $ conf.int : num 0.95
## $ conf.type: chr "log"
## $ lower : num [1:35] 1 0.998 0.995 0.984 0.966 ...
## $ upper : num [1:35] 1 1 0.998 0.99 0.976 ...
## $ call : language survfit(formula = Surv(ageevent, evrmarry) ~ 1, data = atriskW4)
## - attr(*, "class")= chr "survfit"
Showing the total survival and number at risk
Plotting the survival probability
s2 <- survfit2(Surv(ageevent, evrmarry) ~ 1, data = atriskW4) %>%
ggsurvfit() +
labs(
x = "Age",
y = "Cumulative survival probability"
) +
add_confidence_interval() +
add_risktable()
s2

Survival Analysis by focal variable(Living with parent till 18)
s3 <- survfit2(Surv(ageevent, evrmarry) ~ intact18, data = atriskW4) %>%
ggsurvfit() +
labs(
x = "Age",
y = "Cumulative survival probability"
) +
add_confidence_interval() +
add_risktable()
s3

library(dplyr)
result <- atriskW4 %>%
group_by(ageevent) %>%
summarise(num_failed = sum(evrmarry, na.rm = TRUE),
num_censored = n())
result