library(purrr)
library(haven)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(plyr)
##
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
##
## compact
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
library(sur)
library(summarytools)
library(Rmisc)
## Loading required package: lattice
library(car)
## Loading required package: carData
##
## Attaching package: 'carData'
## The following objects are masked from 'package:sur':
##
## Anscombe, States
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
library(usmap)
library(ggthemes)
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ readr 2.1.4 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::arrange() masks plyr::arrange()
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ plyr::compact() masks purrr::compact()
## ✖ dplyr::count() masks plyr::count()
## ✖ dplyr::desc() masks plyr::desc()
## ✖ scales::discard() masks purrr::discard()
## ✖ dplyr::failwith() masks plyr::failwith()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::id() masks plyr::id()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::mutate() masks plyr::mutate()
## ✖ car::recode() masks dplyr::recode()
## ✖ dplyr::rename() masks plyr::rename()
## ✖ car::some() masks purrr::some()
## ✖ dplyr::summarise() masks plyr::summarise()
## ✖ dplyr::summarize() masks plyr::summarize()
## ✖ tibble::view() masks summarytools::view()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidycensus)
library("ggpubr")
##
## Attaching package: 'ggpubr'
##
## The following object is masked from 'package:plyr':
##
## mutate
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library("writexl")
library(readxl)
library("tinytex")
library("lubridate")
library(splitstackshape)
library(tinytex)
library(psych)
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:car':
##
## logit
##
## The following object is masked from 'package:sur':
##
## skew
##
## The following objects are masked from 'package:scales':
##
## alpha, rescale
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(glmmTMB)
library(readxl)
library(openxlsx)
library(readr)
X2017_2019_Male_Data_2 <-read_csv("C:/Users/bryan/OneDrive/Desktop/2017_2019_Male_Data_2.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 5206 Columns: 3009
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1939): caseid, rscrninf, rscrage, rscrhisp, rscrrace, age_a, age_r, age...
## lgl (1070): sedbclc3, sedbclc4, sedwhlc3, sedwhlc4, sedconlc4, p2currwife, p...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Subset data
shortdata <- X2017_2019_Male_Data_2[, c("age_r", "hisp", "fmarit", "hisagem", "agemarrn")]
#censored in life table, interviwed but not married #create variables for marriage status
dat <- shortdata
# Define ageevent for evmarried based on your criteria (replace with the appropriate variable)
evmarried <- filter(dat, fmarit %in% c(1, 2, 3, 4))
evmarried$ageevent <- evmarried$agemarrn # Replace with the correct variable
# Define ageevent for nevmarried based on your criteria (replace with the appropriate variable)
nevmarried <- filter(dat, fmarit == 5)
nevmarried$ageevent <- nevmarried$age_r # Replace with the correct variable
# Combine data
atriskW4 <- rbind(evmarried, nevmarried)
# Now you can create your tables
tabyl(evmarried$ageevent)
## evmarried$ageevent n percent valid_percent
## 16 3 0.0015298317 0.004862237
## 17 10 0.0050994391 0.016207455
## 18 28 0.0142784294 0.045380875
## 19 42 0.0214176441 0.068071313
## 20 49 0.0249872514 0.079416532
## 21 80 0.0407955125 0.129659643
## 22 46 0.0234574197 0.074554295
## 23 48 0.0244773075 0.077795786
## 24 58 0.0295767466 0.094003241
## 25 51 0.0260071392 0.082658023
## 26 25 0.0127485977 0.040518639
## 27 31 0.0158082611 0.050243112
## 28 25 0.0127485977 0.040518639
## 29 30 0.0152983172 0.048622366
## 30 13 0.0066292708 0.021069692
## 31 15 0.0076491586 0.024311183
## 32 9 0.0045894952 0.014586710
## 33 11 0.0056093830 0.017828201
## 34 9 0.0045894952 0.014586710
## 35 6 0.0030596634 0.009724473
## 36 6 0.0030596634 0.009724473
## 37 6 0.0030596634 0.009724473
## 38 5 0.0025497195 0.008103728
## 40 3 0.0015298317 0.004862237
## 41 2 0.0010198878 0.003241491
## 42 1 0.0005099439 0.001620746
## 43 1 0.0005099439 0.001620746
## 45 1 0.0005099439 0.001620746
## 98 3 0.0015298317 0.004862237
## NA 1344 0.6853646099 NA
tabyl(nevmarried$ageevent)
## nevmarried$ageevent n percent
## 15 186 0.057425131
## 16 186 0.057425131
## 17 223 0.068848410
## 18 229 0.070700834
## 19 206 0.063599877
## 20 115 0.035504785
## 21 128 0.039518370
## 22 127 0.039209633
## 23 120 0.037048472
## 24 128 0.039518370
## 25 121 0.037357209
## 26 104 0.032108676
## 27 117 0.036122260
## 28 129 0.039827107
## 29 89 0.027477617
## 30 109 0.033652362
## 31 94 0.029021303
## 32 99 0.030564989
## 33 89 0.027477617
## 34 61 0.018832973
## 35 62 0.019141710
## 36 51 0.015745600
## 37 46 0.014201914
## 38 49 0.015128126
## 39 50 0.015436863
## 40 41 0.012658228
## 41 35 0.010805804
## 42 42 0.012966965
## 43 27 0.008335906
## 44 27 0.008335906
## 45 26 0.008027169
## 46 32 0.009879592
## 47 33 0.010188330
## 48 28 0.008644643
## 49 30 0.009262118
atriskW4 <- shortdata
#evmarW4
atriskW4<-rbind(evmarried,nevmarried)
tabyl(evmarried$ageevent)
## evmarried$ageevent n percent valid_percent
## 16 3 0.0015298317 0.004862237
## 17 10 0.0050994391 0.016207455
## 18 28 0.0142784294 0.045380875
## 19 42 0.0214176441 0.068071313
## 20 49 0.0249872514 0.079416532
## 21 80 0.0407955125 0.129659643
## 22 46 0.0234574197 0.074554295
## 23 48 0.0244773075 0.077795786
## 24 58 0.0295767466 0.094003241
## 25 51 0.0260071392 0.082658023
## 26 25 0.0127485977 0.040518639
## 27 31 0.0158082611 0.050243112
## 28 25 0.0127485977 0.040518639
## 29 30 0.0152983172 0.048622366
## 30 13 0.0066292708 0.021069692
## 31 15 0.0076491586 0.024311183
## 32 9 0.0045894952 0.014586710
## 33 11 0.0056093830 0.017828201
## 34 9 0.0045894952 0.014586710
## 35 6 0.0030596634 0.009724473
## 36 6 0.0030596634 0.009724473
## 37 6 0.0030596634 0.009724473
## 38 5 0.0025497195 0.008103728
## 40 3 0.0015298317 0.004862237
## 41 2 0.0010198878 0.003241491
## 42 1 0.0005099439 0.001620746
## 43 1 0.0005099439 0.001620746
## 45 1 0.0005099439 0.001620746
## 98 3 0.0015298317 0.004862237
## NA 1344 0.6853646099 NA
#this is number of censored for each age, match and use formulas for life tble
tabyl(nevmarried$ageevent)
## nevmarried$ageevent n percent
## 15 186 0.057425131
## 16 186 0.057425131
## 17 223 0.068848410
## 18 229 0.070700834
## 19 206 0.063599877
## 20 115 0.035504785
## 21 128 0.039518370
## 22 127 0.039209633
## 23 120 0.037048472
## 24 128 0.039518370
## 25 121 0.037357209
## 26 104 0.032108676
## 27 117 0.036122260
## 28 129 0.039827107
## 29 89 0.027477617
## 30 109 0.033652362
## 31 94 0.029021303
## 32 99 0.030564989
## 33 89 0.027477617
## 34 61 0.018832973
## 35 62 0.019141710
## 36 51 0.015745600
## 37 46 0.014201914
## 38 49 0.015128126
## 39 50 0.015436863
## 40 41 0.012658228
## 41 35 0.010805804
## 42 42 0.012966965
## 43 27 0.008335906
## 44 27 0.008335906
## 45 26 0.008027169
## 46 32 0.009879592
## 47 33 0.010188330
## 48 28 0.008644643
## 49 30 0.009262118
atriskW4$censor<- recode(atriskW4$fmarit, "5=1; else=0")
atriskW4$married<- recode(atriskW4$fmarit, "1:4=1; else = 0")
table(atriskW4$ageevent,atriskW4$fmarit)
##
## 1 2 3 4 5
## 15 0 0 0 0 186
## 16 2 0 1 0 186
## 17 5 0 4 1 223
## 18 13 3 11 1 229
## 19 18 0 17 7 206
## 20 24 0 20 5 115
## 21 37 2 37 4 128
## 22 15 0 27 4 127
## 23 19 1 23 5 120
## 24 28 2 20 8 128
## 25 9 1 30 11 121
## 26 6 2 17 0 104
## 27 10 0 14 7 117
## 28 2 0 17 6 129
## 29 10 1 13 6 89
## 30 2 0 8 3 109
## 31 3 0 9 3 94
## 32 2 0 6 1 99
## 33 2 1 5 3 89
## 34 2 1 4 2 61
## 35 0 0 6 0 62
## 36 1 0 5 0 51
## 37 1 0 4 1 46
## 38 0 0 3 2 49
## 39 0 0 0 0 50
## 40 1 0 1 1 41
## 41 0 1 1 0 35
## 42 0 0 1 0 42
## 43 0 0 1 0 27
## 44 0 0 0 0 27
## 45 0 0 1 0 26
## 46 0 0 0 0 32
## 47 0 0 0 0 33
## 48 0 0 0 0 28
## 49 0 0 0 0 30
## 98 1 0 1 1 0
atriskW4$censor<- recode(atriskW4$fmarit, "5=1; else=0")
table(atriskW4$ageevent,atriskW4$fmarit)
##
## 1 2 3 4 5
## 15 0 0 0 0 186
## 16 2 0 1 0 186
## 17 5 0 4 1 223
## 18 13 3 11 1 229
## 19 18 0 17 7 206
## 20 24 0 20 5 115
## 21 37 2 37 4 128
## 22 15 0 27 4 127
## 23 19 1 23 5 120
## 24 28 2 20 8 128
## 25 9 1 30 11 121
## 26 6 2 17 0 104
## 27 10 0 14 7 117
## 28 2 0 17 6 129
## 29 10 1 13 6 89
## 30 2 0 8 3 109
## 31 3 0 9 3 94
## 32 2 0 6 1 99
## 33 2 1 5 3 89
## 34 2 1 4 2 61
## 35 0 0 6 0 62
## 36 1 0 5 0 51
## 37 1 0 4 1 46
## 38 0 0 3 2 49
## 39 0 0 0 0 50
## 40 1 0 1 1 41
## 41 0 1 1 0 35
## 42 0 0 1 0 42
## 43 0 0 1 0 27
## 44 0 0 0 0 27
## 45 0 0 1 0 26
## 46 0 0 0 0 32
## 47 0 0 0 0 33
## 48 0 0 0 0 28
## 49 0 0 0 0 30
## 98 1 0 1 1 0
# Recode variables
atriskW4$censor <- recode(atriskW4$fmarit, "5=1; else=0")
atriskW4$married <- recode(atriskW4$fmarit, "1:4=1; else=0")
# Create tables
table(atriskW4$ageevent, atriskW4$fmarit)
##
## 1 2 3 4 5
## 15 0 0 0 0 186
## 16 2 0 1 0 186
## 17 5 0 4 1 223
## 18 13 3 11 1 229
## 19 18 0 17 7 206
## 20 24 0 20 5 115
## 21 37 2 37 4 128
## 22 15 0 27 4 127
## 23 19 1 23 5 120
## 24 28 2 20 8 128
## 25 9 1 30 11 121
## 26 6 2 17 0 104
## 27 10 0 14 7 117
## 28 2 0 17 6 129
## 29 10 1 13 6 89
## 30 2 0 8 3 109
## 31 3 0 9 3 94
## 32 2 0 6 1 99
## 33 2 1 5 3 89
## 34 2 1 4 2 61
## 35 0 0 6 0 62
## 36 1 0 5 0 51
## 37 1 0 4 1 46
## 38 0 0 3 2 49
## 39 0 0 0 0 50
## 40 1 0 1 1 41
## 41 0 1 1 0 35
## 42 0 0 1 0 42
## 43 0 0 1 0 27
## 44 0 0 0 0 27
## 45 0 0 1 0 26
## 46 0 0 0 0 32
## 47 0 0 0 0 33
## 48 0 0 0 0 28
## 49 0 0 0 0 30
## 98 1 0 1 1 0
# Load survival libraries
library(tidycmprsk)
library(survival)
# Fit survival model
s1 <- survfit(Surv(fmarit) ~ 1, data = atriskW4)
str(s1)
## List of 16
## $ n : int 5200
## $ time : num [1:5] 1 2 3 4 5
## $ n.risk : num [1:5] 5200 3735 3716 3368 3239
## $ n.event : num [1:5] 1465 19 348 129 3239
## $ n.censor : num [1:5] 0 0 0 0 0
## $ surv : num [1:5] 0.718 0.715 0.648 0.623 0
## $ std.err : num [1:5] 0.00869 0.00876 0.01023 0.01079 Inf
## $ cumhaz : num [1:5] 0.282 0.287 0.38 0.419 1.419
## $ std.chaz : num [1:5] 0.00736 0.00745 0.00899 0.0096 0.02002
## $ type : chr "right"
## $ logse : logi TRUE
## $ conf.int : num 0.95
## $ conf.type: chr "log"
## $ lower : num [1:5] 0.706 0.702 0.635 0.61 NA
## $ upper : num [1:5] 0.731 0.727 0.661 0.636 NA
## $ call : language survfit(formula = Surv(fmarit) ~ 1, data = atriskW4)
## - attr(*, "class")= chr "survfit"
library(dplyr)
# Filter individuals who had been married (fmarit == 1, 2, 3, or 4)
married_data <- shortdata %>%
filter(fmarit %in% c(1, 2, 3, 4))
# Filter individuals who had not been married (fmarit == 5)
not_married_data <- shortdata %>%
filter(fmarit == 5)
# Create a variable for age at first marriage.
dat <- shortdata %>%
mutate(agemarry1 = pmin(hisagem, agemarrn, na.rm=TRUE))
head(dat)
## # A tibble: 6 × 6
## age_r hisp fmarit hisagem agemarrn agemarry1
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 31 5 5 NA NA NA
## 2 17 5 5 NA NA NA
## 3 16 1 5 NA NA NA
## 4 49 5 3 NA 18 18
## 5 39 5 5 NA NA NA
## 6 37 5 1 27 NA 27
nrow(dat)
## [1] 5206
dat<-as.data.frame(dat)
tabyl(dat$agemarry1)
## dat$agemarry1 n percent valid_percent
## 16 6 0.0011525163 0.003210273
## 17 11 0.0021129466 0.005885500
## 18 42 0.0080676143 0.022471910
## 19 70 0.0134460238 0.037453184
## 20 84 0.0161352286 0.044943820
## 21 136 0.0261237034 0.072766185
## 22 121 0.0232424126 0.064740503
## 23 131 0.0251632731 0.070090958
## 24 139 0.0266999616 0.074371322
## 25 163 0.0313100269 0.087212413
## 26 136 0.0261237034 0.072766185
## 27 117 0.0224740684 0.062600321
## 28 109 0.0209373799 0.058319957
## 29 117 0.0224740684 0.062600321
## 30 88 0.0169035728 0.047084002
## 31 70 0.0134460238 0.037453184
## 32 63 0.0121014214 0.033707865
## 33 57 0.0109489051 0.030497592
## 34 47 0.0090280446 0.025147138
## 35 29 0.0055704956 0.015516319
## 36 31 0.0059546677 0.016586410
## 37 27 0.0051863235 0.014446228
## 38 15 0.0028812908 0.008025682
## 39 7 0.0013446024 0.003745318
## 40 16 0.0030733769 0.008560728
## 41 9 0.0017287745 0.004815409
## 42 8 0.0015366884 0.004280364
## 43 5 0.0009604303 0.002675227
## 44 4 0.0007683442 0.002140182
## 45 4 0.0007683442 0.002140182
## 46 3 0.0005762582 0.001605136
## 98 4 0.0007683442 0.002140182
## NA 3337 0.6409911640 NA
#number of each of marriage, the kinds of fmarit variable
table(dat$agemarry1, dat$fmarit)
##
## 0 1 2 3 4 5
## 16 0 5 0 1 0 0
## 17 0 6 0 4 1 0
## 18 0 27 3 11 1 0
## 19 0 46 0 17 7 0
## 20 0 59 0 20 5 0
## 21 0 93 2 37 4 0
## 22 0 90 0 27 4 0
## 23 0 102 1 23 5 0
## 24 0 109 2 20 8 0
## 25 0 121 1 30 11 0
## 26 0 117 2 17 0 0
## 27 0 96 0 14 7 0
## 28 0 86 0 17 6 0
## 29 0 97 1 13 6 0
## 30 0 77 0 8 3 0
## 31 0 58 0 9 3 0
## 32 0 56 0 6 1 0
## 33 0 48 1 5 3 0
## 34 0 40 1 4 2 0
## 35 0 23 0 6 0 0
## 36 0 26 0 5 0 0
## 37 0 22 0 4 1 0
## 38 0 10 0 3 2 0
## 39 0 7 0 0 0 0
## 40 0 14 0 1 1 0
## 41 0 7 1 1 0 0
## 42 0 7 0 1 0 0
## 43 0 4 0 1 0 0
## 44 0 4 0 0 0 0
## 45 0 3 0 1 0 0
## 46 0 3 0 0 0 0
## 98 0 2 0 1 1 0
length(atriskW4)
## [1] 8
library(tidycmprsk)
library(survival)
s1 <- survfit(Surv(fmarit) ~ 1, data = atriskW4)
str(s1)
## List of 16
## $ n : int 5200
## $ time : num [1:5] 1 2 3 4 5
## $ n.risk : num [1:5] 5200 3735 3716 3368 3239
## $ n.event : num [1:5] 1465 19 348 129 3239
## $ n.censor : num [1:5] 0 0 0 0 0
## $ surv : num [1:5] 0.718 0.715 0.648 0.623 0
## $ std.err : num [1:5] 0.00869 0.00876 0.01023 0.01079 Inf
## $ cumhaz : num [1:5] 0.282 0.287 0.38 0.419 1.419
## $ std.chaz : num [1:5] 0.00736 0.00745 0.00899 0.0096 0.02002
## $ type : chr "right"
## $ logse : logi TRUE
## $ conf.int : num 0.95
## $ conf.type: chr "log"
## $ lower : num [1:5] 0.706 0.702 0.635 0.61 NA
## $ upper : num [1:5] 0.731 0.727 0.661 0.636 NA
## $ call : language survfit(formula = Surv(fmarit) ~ 1, data = atriskW4)
## - attr(*, "class")= chr "survfit"
library(ggsurvfit)
##
## Attaching package: 'ggsurvfit'
## The following object is masked from 'package:psych':
##
## %+%
survfit2(Surv(ageevent) ~ 1, data = atriskW4) %>%
ggsurvfit() +
labs(
x = "Age",
y = "Cumulative survival probability"
) +
add_confidence_interval() +
add_risktable()
summary(survfit(Surv(ageevent) ~ 1, data = atriskW4), times = 25)
## Call: survfit(formula = Surv(ageevent) ~ 1, data = atriskW4)
##
## 1344 observations deleted due to missingness
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 25 1844 2184 0.434 0.00798 0.418 0.45
phtest <- survfit(Surv(hisagem, fmarit) ~ hisp, data = atriskW4)
## Warning in Surv(hisagem, fmarit): Invalid status value, converted to NA
#https://rdrr.io/cran/survival/man/plot.survfit.html
# now running some other plots
plot(phtest, fun = "S", xlab = "age",
ylab = "cumulative survival", main = "cumulative survival curves by hispanic")
plot(phtest, fun = "cumhaz", xlab = "age",
ylab = "cumulative hazard", main = "cumulative hazard curves by hispanic")
plot(phtest, fun = "cloglog", xlab = "age",
ylab = "log-log survival", main = "log-log curves by hispanic")
ageprobs <- atriskW4 %>%
group_by(age_r, hisp, fmarit, hisagem, ageevent, married) %>%
summarize(q = mean(married, na.rm = TRUE), n = n(), .groups = "drop")
#ask about it!
ageprobs$numq <- ageprobs$q*(1-ageprobs$q)
ageprobs$seq <- sqrt(ageprobs$numq/ageprobs$n)
ageprobs$meq <- 1.96*ageprobs$seq
ageprobs$logq <- log(ageprobs$q)
ageprobs$numq <- ageprobs$q*(1-ageprobs$q)
ageprobs$seq <- sqrt(ageprobs$numq/ageprobs$n)
ageprobs$meq <- 1.96*ageprobs$seq
ageprobs$logq <- log(ageprobs$q)
ggplot(data =ageprobs, aes(x = ageevent, y = q, ymin=q-meq, ymax=q+meq, fill=hisp,color =hisp, group = hisp)) +
geom_line() + ylim(0,.15) + geom_ribbon(alpha=0.3,aes(color=NULL)) +
labs(title="First Marriage Rates by Age",
subtitle="US-Born Participants",
caption="Source: NSYR Participants w/ W3 and W4 Data") +
xlab(label="Age at Risk") +
ylab(label="First Marriage Rate")
## Warning: Removed 660 rows containing missing values (`geom_line()`).
#ask about it!
ggplot(data =ageprobs, aes(x = ageevent, y = logq, group = hisp)) +
geom_line() +
labs(title="First Marriage Rates (Logged) by Age",
subtitle="US-Born Participants",
caption="Source: NSYR Participants w/ W3 and W4 Data") +
xlab(label="Age at Risk") +
ylab(label="First Marriage Rate (Logged")
## Warning: Removed 657 rows containing missing values (`geom_line()`).