#read in data
library(readr)
mdata <- read_csv("C:/Users/rlutt/OneDrive/Fall 2023/Advanced Demography/2017_2019_Male_Data.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 5206 Columns: 3009
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1939): caseid, rscrninf, rscrage, rscrhisp, rscrrace, age_a, age_r, age...
## lgl (1070): sedbclc3, sedbclc4, sedwhlc3, sedwhlc4, sedconlc4, p2currwife, p...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Homework Instructions
Produce a life table in Excel for age at first marriage based on a crosstab of age of event and whether ever married. • Add a focal variable with two categories. • Run the same analyses in R with the exception of the left truncation stuff. • Note: Begin working on your timing and outcome variable immediatel
#check distribution of data
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
#age at interview
tabyl(mdata$ager)
## mdata$ager n percent
## 15 186 0.0357280061
## 16 186 0.0357280061
## 17 223 0.0428351902
## 18 230 0.0441797925
## 19 207 0.0397618133
## 20 117 0.0224740684
## 21 131 0.0251632731
## 22 139 0.0266999616
## 23 133 0.0255474453
## 24 146 0.0280445640
## 25 140 0.0268920476
## 26 135 0.0259316174
## 27 156 0.0299654245
## 28 193 0.0370726085
## 29 145 0.0278524779
## 30 162 0.0311179408
## 31 163 0.0313100269
## 32 199 0.0382251249
## 33 190 0.0364963504
## 34 135 0.0259316174
## 35 162 0.0311179408
## 36 143 0.0274683058
## 37 127 0.0243949289
## 38 125 0.0240107568
## 39 153 0.0293891663
## 40 127 0.0243949289
## 41 125 0.0240107568
## 42 138 0.0265078755
## 43 99 0.0190165194
## 44 105 0.0201690357
## 45 94 0.0180560891
## 46 116 0.0222819823
## 47 136 0.0261237034
## 48 117 0.0224740684
## 49 120 0.0230503265
## 50 3 0.0005762582
#marital status
tabyl(mdata$fmarital)
## mdata$fmarital n percent
## 1 1465 0.281406070
## 2 19 0.003649635
## 3 348 0.066845947
## 4 129 0.024779101
## 5 3245 0.623319247
#age at current marriage
tabyl(mdata$hisagem)
## mdata$hisagem n percent valid_percent
## 16 3 0.0005762582 0.0020477816
## 17 1 0.0001920861 0.0006825939
## 18 14 0.0026892048 0.0095563140
## 19 28 0.0053784095 0.0191126280
## 20 35 0.0067230119 0.0238907850
## 21 57 0.0109489051 0.0389078498
## 22 76 0.0145985401 0.0518771331
## 23 88 0.0169035728 0.0600682594
## 24 83 0.0159431425 0.0566552901
## 25 113 0.0217057242 0.0771331058
## 26 120 0.0230503265 0.0819112628
## 27 103 0.0197848636 0.0703071672
## 28 92 0.0176719170 0.0627986348
## 29 100 0.0192086055 0.0682593857
## 30 82 0.0157510565 0.0559726962
## 31 68 0.0130618517 0.0464163823
## 32 65 0.0124855935 0.0443686007
## 33 63 0.0121014214 0.0430034130
## 34 54 0.0103726469 0.0368600683
## 35 33 0.0063388398 0.0225255973
## 36 40 0.0076834422 0.0273037543
## 37 28 0.0053784095 0.0191126280
## 38 21 0.0040338071 0.0143344710
## 39 16 0.0030733769 0.0109215017
## 40 20 0.0038417211 0.0136518771
## 41 14 0.0026892048 0.0095563140
## 42 14 0.0026892048 0.0095563140
## 43 8 0.0015366884 0.0054607509
## 44 10 0.0019208605 0.0068259386
## 45 6 0.0011525163 0.0040955631
## 46 6 0.0011525163 0.0040955631
## 47 2 0.0003841721 0.0013651877
## 98 2 0.0003841721 0.0013651877
## NA 3741 0.7185939301 NA
#age at first marriage
tabyl(mdata$agemarrn)
## mdata$agemarrn n percent valid_percent
## 16 3 0.0005762582 0.004862237
## 17 10 0.0019208605 0.016207455
## 18 28 0.0053784095 0.045380875
## 19 42 0.0080676143 0.068071313
## 20 49 0.0094122167 0.079416532
## 21 80 0.0153668844 0.129659643
## 22 46 0.0088359585 0.074554295
## 23 48 0.0092201306 0.077795786
## 24 58 0.0111409912 0.094003241
## 25 51 0.0097963888 0.082658023
## 26 25 0.0048021514 0.040518639
## 27 31 0.0059546677 0.050243112
## 28 25 0.0048021514 0.040518639
## 29 30 0.0057625816 0.048622366
## 30 13 0.0024971187 0.021069692
## 31 15 0.0028812908 0.024311183
## 32 9 0.0017287745 0.014586710
## 33 11 0.0021129466 0.017828201
## 34 9 0.0017287745 0.014586710
## 35 6 0.0011525163 0.009724473
## 36 6 0.0011525163 0.009724473
## 37 6 0.0011525163 0.009724473
## 38 5 0.0009604303 0.008103728
## 40 3 0.0005762582 0.004862237
## 41 2 0.0003841721 0.003241491
## 42 1 0.0001920861 0.001620746
## 43 1 0.0001920861 0.001620746
## 45 1 0.0001920861 0.001620746
## 98 3 0.0005762582 0.004862237
## NA 4589 0.8814829043 NA
#age at second marriage
tabyl(mdata$agemarrn2)
## mdata$agemarrn2 n percent valid_percent
## 20 2 0.0003841721 0.02439024
## 21 2 0.0003841721 0.02439024
## 22 3 0.0005762582 0.03658537
## 23 5 0.0009604303 0.06097561
## 24 3 0.0005762582 0.03658537
## 25 7 0.0013446024 0.08536585
## 26 7 0.0013446024 0.08536585
## 27 6 0.0011525163 0.07317073
## 28 7 0.0013446024 0.08536585
## 29 6 0.0011525163 0.07317073
## 30 3 0.0005762582 0.03658537
## 31 4 0.0007683442 0.04878049
## 32 4 0.0007683442 0.04878049
## 33 3 0.0005762582 0.03658537
## 34 1 0.0001920861 0.01219512
## 35 2 0.0003841721 0.02439024
## 36 4 0.0007683442 0.04878049
## 38 4 0.0007683442 0.04878049
## 39 2 0.0003841721 0.02439024
## 40 1 0.0001920861 0.01219512
## 41 1 0.0001920861 0.01219512
## 43 1 0.0001920861 0.01219512
## 44 1 0.0001920861 0.01219512
## 98 1 0.0001920861 0.01219512
## 99 2 0.0003841721 0.02439024
## NA 5124 0.9842489435 NA
#age at third marriage
tabyl(mdata$agemarrn3)
## mdata$agemarrn3 n percent valid_percent
## 25 2 0.0003841721 0.15384615
## 28 1 0.0001920861 0.07692308
## 29 2 0.0003841721 0.15384615
## 30 2 0.0003841721 0.15384615
## 32 1 0.0001920861 0.07692308
## 33 1 0.0001920861 0.07692308
## 36 1 0.0001920861 0.07692308
## 37 1 0.0001920861 0.07692308
## 41 1 0.0001920861 0.07692308
## 99 1 0.0001920861 0.07692308
## NA 5193 0.9975028813 NA
#age at fourth marriage
tabyl(mdata$agemarrn4)
## mdata$agemarrn4 n percent valid_percent
## NA 5206 1 NA
# set up risk set
library(car)
## Loading required package: carData
mdata$status <- Recode(mdata$fmarital,
recodes= "1 = 1; 5 = 0; 2 = 2; 3=2; 4=2")
mdata$evermarried<-Recode(mdata$fmarital,"1:4 = 1; 5= 0")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##here I use nformwife and numwife as a way to point out if the respondent has been married more than once
##I make 4 groups: those who are married and this in their first marriage, those who are married and this is a previous marriage than their first, those who are divorce/separated or widowed, and the never married
marriedsetnfw <- filter(mdata, mdata$status==1 & numwife==1)
marriedsetfw<-filter(mdata, status==1 & nformwife>0)
nmarriedset <- filter(mdata, status ==0)
marriedinpast<-filter(mdata, status==2 & nformwife>0)
marriedsetnfw$ageevent <-marriedsetnfw$hisagem
marriedsetfw$ageevent<-marriedsetfw$agemarrn
nmarriedset$ageevent <- nmarriedset$ager
marriedinpast$ageevent<-marriedinpast$agemarrn
#here I point out those who have been married before or are currently married as compared to those that have never been married
hasbeenmarried<-rbind(marriedsetnfw, marriedinpast)
tabyl(hasbeenmarried$ageevent)
## hasbeenmarried$ageevent n percent valid_percent
## 16 3 0.001726122 0.001822600
## 17 6 0.003452244 0.003645200
## 18 29 0.016685846 0.017618469
## 19 52 0.029919448 0.031591738
## 20 60 0.034522440 0.036452005
## 21 99 0.056962025 0.060145808
## 22 106 0.060989643 0.064398542
## 23 112 0.064441887 0.068043742
## 24 111 0.063866513 0.067436209
## 25 153 0.088032221 0.092952612
## 26 129 0.074223245 0.078371810
## 27 106 0.060989643 0.064398542
## 28 106 0.060989643 0.064398542
## 29 107 0.061565017 0.065006075
## 30 86 0.049482163 0.052247874
## 31 65 0.037399310 0.039489672
## 32 59 0.033947066 0.035844471
## 33 55 0.031645570 0.033414338
## 34 44 0.025316456 0.026731470
## 35 29 0.016685846 0.017618469
## 36 30 0.017261220 0.018226002
## 37 26 0.014959724 0.015795869
## 38 15 0.008630610 0.009113001
## 39 7 0.004027618 0.004252734
## 40 15 0.008630610 0.009113001
## 41 9 0.005178366 0.005467801
## 42 8 0.004602992 0.004860267
## 43 4 0.002301496 0.002430134
## 44 4 0.002301496 0.002430134
## 45 4 0.002301496 0.002430134
## 46 3 0.001726122 0.001822600
## 98 4 0.002301496 0.002430134
## NA 92 0.052934407 NA
#after making the age of event variable the same (Ageevent) for all the groups I bind them together
atrisk <- rbind(marriedsetnfw,nmarriedset, marriedsetfw, marriedinpast)
atrisk<- atrisk%>%
filter(ageevent!="NA")%>%
filter(ageevent!=98)
tabyl(atrisk$ageevent)
## atrisk$ageevent n percent
## 15 186 0.036477741
## 16 191 0.037458325
## 17 234 0.045891351
## 18 271 0.053147676
## 19 276 0.054128260
## 20 199 0.039027260
## 21 264 0.051774858
## 22 248 0.048636988
## 23 251 0.049225338
## 24 267 0.052363208
## 25 283 0.055501079
## 26 239 0.046871936
## 27 233 0.045695234
## 28 238 0.046675819
## 29 206 0.040400078
## 30 197 0.038635026
## 31 163 0.031967052
## 32 160 0.031378702
## 33 146 0.028633065
## 34 107 0.020984507
## 35 92 0.018042753
## 36 82 0.016081585
## 37 73 0.014316533
## 38 64 0.012551481
## 39 57 0.011178662
## 40 58 0.011374779
## 41 44 0.008629143
## 42 50 0.009805844
## 43 31 0.006079623
## 44 31 0.006079623
## 45 30 0.005883507
## 46 35 0.006864091
## 47 33 0.006471857
## 48 29 0.005687390
## 49 31 0.006079623
#create crosstab of age of event and whether ever married
table(atrisk$agemarrn,atrisk$ager)
##
## 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
## 16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0
## 17 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 1 1
## 18 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 4 0 0 1 3
## 19 0 0 0 0 0 0 0 0 1 1 1 2 1 0 1 1 1 2 2 0 2 1 1 0 1
## 20 0 0 0 0 0 0 0 0 0 1 0 0 1 0 2 1 2 3 1 2 0 4 1 2 4
## 21 0 0 0 0 0 0 0 0 0 0 1 2 0 2 0 3 1 6 3 1 8 4 1 3 3
## 22 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 4 5 1 2 2 2 1 1 2
## 23 0 0 0 0 0 0 0 0 0 0 0 0 1 2 0 0 3 2 5 2 1 1 1 3 1
## 24 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 3 1 2 2 3 2 3 4
## 25 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 2 1 1 5 3 0 3 1 3
## 26 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 1 0 0 0 0 0 1 0
## 27 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 1 0 2 1 4 2
## 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 1 3 1 2 1 2
## 29 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 4 0 0 2
## 30 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 0 0 1 0 0 2
## 31 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 2
## 32 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 1 0 0 0
## 33 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 3
## 34 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## 35 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 36 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
## 37 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 38 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 40 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 41 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 42 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 43 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 45 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##
## 40 41 42 43 44 45 46 47 48 49 50
## 16 0 0 0 0 0 0 0 1 0 0 0
## 17 0 0 1 0 0 2 0 2 0 0 0
## 18 3 2 2 1 2 0 2 1 1 3 1
## 19 0 1 2 2 2 3 2 5 2 5 0
## 20 1 0 3 1 1 3 5 3 4 4 0
## 21 1 4 6 3 4 2 2 5 8 7 0
## 22 2 2 3 4 1 1 3 5 3 0 0
## 23 5 1 6 6 1 1 2 2 1 1 0
## 24 2 4 5 3 4 4 2 5 3 3 0
## 25 0 6 5 4 4 2 1 3 1 4 0
## 26 3 2 2 0 2 1 3 3 3 2 0
## 27 4 1 3 0 2 0 1 1 3 3 0
## 28 0 1 3 0 2 1 1 1 1 2 0
## 29 1 2 1 3 1 4 4 1 1 4 0
## 30 2 1 1 1 0 1 0 0 0 1 0
## 31 0 1 0 0 3 2 2 0 3 0 0
## 32 0 0 2 0 1 1 0 1 0 1 0
## 33 1 1 1 0 0 1 0 2 0 0 0
## 34 0 2 0 0 1 0 0 1 2 2 0
## 35 1 0 0 0 3 0 1 1 0 0 0
## 36 0 0 0 1 1 0 1 1 0 1 0
## 37 0 1 0 0 1 1 0 0 2 1 0
## 38 0 1 0 0 0 0 0 2 1 1 0
## 40 0 0 0 0 0 1 1 0 0 1 0
## 41 0 0 0 0 0 0 0 0 1 1 0
## 42 0 0 0 0 0 0 0 0 0 1 0
## 43 0 0 0 0 0 0 0 0 0 1 0
## 45 0 0 0 0 0 0 0 0 0 1 0
tabyl(atrisk$evermarried)
## atrisk$evermarried n percent
## 0 3245 0.6363993
## 1 1854 0.3636007
atrisk$censored<-Recode(as.factor(atrisk$evermarried),
recodes= "0 = 1; else=0")
x<-table(atrisk$ageevent,atrisk$evermarried)
x
##
## 0 1
## 15 186 0
## 16 186 5
## 17 223 11
## 18 229 42
## 19 206 70
## 20 115 84
## 21 128 136
## 22 127 121
## 23 120 131
## 24 128 139
## 25 121 162
## 26 104 135
## 27 117 116
## 28 130 108
## 29 89 117
## 30 109 88
## 31 95 68
## 32 99 61
## 33 89 57
## 34 61 46
## 35 63 29
## 36 51 31
## 37 46 27
## 38 49 15
## 39 50 7
## 40 42 16
## 41 35 9
## 42 42 8
## 43 27 4
## 44 27 4
## 45 26 4
## 46 32 3
## 47 33 0
## 48 29 0
## 49 31 0
#filter for complete cases
library(dplyr)
atrisk%>%
filter(complete.cases(ageevent, evermarried))
## # A tibble: 5,099 × 3,013
## caseid rscrninf rscrage rscrhisp rscrrace age_a age_r agescrn hisp hispgrp
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 80734 1 37 5 3 37 37 37 5 NA
## 2 80739 1 39 5 3 40 40 39 5 NA
## 3 80745 1 32 5 3 32 32 32 5 NA
## 4 80770 5 48 5 3 48 48 48 5 NA
## 5 80775 5 26 5 3 26 26 26 5 NA
## 6 80777 1 29 5 2 29 29 29 5 NA
## 7 80794 5 35 5 3 35 35 35 5 NA
## 8 80810 5 35 1 4 35 35 35 1 1
## 9 80825 1 28 5 3 27 27 27 5 NA
## 10 80837 1 41 5 3 42 42 42 5 NA
## # ℹ 5,089 more rows
## # ℹ 3,003 more variables: primlang1 <dbl>, primlang2 <dbl>, primlang3 <dbl>,
## # roscnt <dbl>, marstat <dbl>, fmarstat <dbl>, fmarit <dbl>, evrmarry <dbl>,
## # wplocale <dbl>, womrel <dbl>, goschol <dbl>, vaca <dbl>, higrade <dbl>,
## # compgrd <dbl>, dipged <dbl>, earnhs_y <dbl>, hischgrd <dbl>,
## # lstgrade <dbl>, myschol_y <dbl>, havedeg <dbl>, degrees <dbl>,
## # earnba_y <dbl>, expschl <dbl>, expgrade <dbl>, wthparnw <dbl>, …
#survival analysis of the event of first marriage
library(ggplot2)
library(survival)
library(ggsurvfit)
s1 <- survfit(Surv(ageevent, evermarried) ~ 1, data = atrisk)
str(s1)
## List of 16
## $ n : int 5099
## $ time : num [1:35] 15 16 17 18 19 20 21 22 23 24 ...
## $ n.risk : num [1:35] 5099 4913 4722 4488 4217 ...
## $ n.event : num [1:35] 0 5 11 42 70 84 136 121 131 139 ...
## $ n.censor : num [1:35] 186 186 223 229 206 115 128 127 120 128 ...
## $ surv : num [1:35] 1 0.999 0.997 0.987 0.971 ...
## $ std.err : num [1:35] 0 0.000455 0.000838 0.001675 0.002609 ...
## $ cumhaz : num [1:35] 0 0.00102 0.00335 0.01271 0.0293 ...
## $ std.chaz : num [1:35] 0 0.000455 0.000837 0.001669 0.002593 ...
## $ type : chr "right"
## $ logse : logi TRUE
## $ conf.int : num 0.95
## $ conf.type: chr "log"
## $ lower : num [1:35] 1 0.998 0.995 0.984 0.966 ...
## $ upper : num [1:35] 1 1 0.998 0.991 0.976 ...
## $ call : language survfit(formula = Surv(ageevent, evermarried) ~ 1, data = atrisk)
## - attr(*, "class")= chr "survfit"
survfit2(Surv(ageevent, evermarried) ~ 1, data = atrisk) %>%
ggsurvfit() +
labs(
x = "Age",
y = "Cumulative survival probabilitay"
) +
add_confidence_interval() +
add_risktable()
The curve begins to drop right before age 20 and continues decreasing
until 50.
#choose a focal variable: Some men have operations that make it impossible for them to father a child. Have you ever had a vasectomy or any other operation that makes it impossible for you to father a child? 1=YES, 5=NO
library(ggplot2)
library(survival)
library(ggsurvfit)
atrisk<-atrisk%>%
filter(everoper<8)
s2 <- survfit(Surv(ageevent, evermarried) ~ everoper, data = atrisk)
str(s2)
## List of 17
## $ n : int [1:2] 232 4864
## $ time : num [1:66] 17 18 19 20 21 22 23 24 25 26 ...
## $ n.risk : num [1:66] 232 231 226 216 201 181 166 144 127 107 ...
## $ n.event : num [1:66] 1 5 10 15 20 15 22 17 19 15 ...
## $ n.censor : num [1:66] 0 0 0 0 0 0 0 0 1 0 ...
## $ surv : num [1:66] 0.996 0.974 0.931 0.866 0.78 ...
## $ std.err : num [1:66] 0.00432 0.0107 0.01787 0.02578 0.03485 ...
## $ cumhaz : num [1:66] 0.00431 0.02596 0.0702 0.13965 0.23915 ...
## $ std.chaz : num [1:66] 0.00431 0.0106 0.01755 0.02509 0.03354 ...
## $ strata : Named int [1:2] 31 35
## ..- attr(*, "names")= chr [1:2] "everoper=1" "everoper=5"
## $ type : chr "right"
## $ logse : logi TRUE
## $ conf.int : num 0.95
## $ conf.type: chr "log"
## $ lower : num [1:66] 0.987 0.954 0.899 0.824 0.729 ...
## $ upper : num [1:66] 1 0.995 0.964 0.911 0.835 ...
## $ call : language survfit(formula = Surv(ageevent, evermarried) ~ everoper, data = atrisk)
## - attr(*, "class")= chr "survfit"
survfit2(Surv(ageevent, evermarried) ~ everoper, data = atrisk) %>%
ggsurvfit() +
labs(
x = "Age",
y = "Cumulative survival probabilitay"
) +
add_confidence_interval() +
add_risktable()
##This survival curve shows that men that have had a vasectomy tend to have a lower survival rate for first marriage than men who have not had a vasectomy. This is somewhat counter-intuitive because you would expect men that do not want to have children may also not want to get married. This can also be used to understand low fertility in the US, as many married men have had a vasectomy leaving them unable to bear children with their wives.
## If you look at the differences in sample sizes, you can see that men who have had a vasectomy are also is a much smaller group than the group of men who have not.
#create person-year file
library(survival)
pydata<-survSplit(atrisk, cut=c(17:50),end="ageevent",event="evermarried")
head(pydata[, c("ageevent", "evermarried", "caseid")], n=20)
## ageevent evermarried caseid
## 1 17 0 80734
## 2 18 0 80734
## 3 19 0 80734
## 4 20 0 80734
## 5 21 0 80734
## 6 22 0 80734
## 7 23 0 80734
## 8 24 0 80734
## 9 25 0 80734
## 10 26 0 80734
## 11 27 1 80734
## 12 17 0 80739
## 13 18 0 80739
## 14 19 0 80739
## 15 20 0 80739
## 16 21 0 80739
## 17 22 0 80739
## 18 23 0 80739
## 19 24 0 80739
## 20 25 0 80739
pydata$fmarry<- pydata$evermarried
table(pydata$ageevent,pydata$fmarry)
##
## 0 1
## 15 186 0
## 16 186 5
## 17 4708 11
## 18 4444 42
## 19 4145 70
## 20 3855 84
## 21 3604 136
## 22 3355 121
## 23 3097 131
## 24 2838 139
## 25 2549 162
## 26 2293 135
## 27 2073 116
## 28 1848 108
## 29 1601 117
## 30 1424 88
## 31 1247 68
## 32 1092 61
## 33 936 57
## 34 801 46
## 35 711 29
## 36 617 31
## 37 539 27
## 38 478 15
## 39 422 7
## 40 356 16
## 41 305 9
## 42 262 8
## 43 216 4
## 44 185 4
## 45 154 4
## 46 125 3
## 47 93 0
## 48 60 0
## 49 31 0
#Here is use the KM method to generate conditional probabilities of an event
ageprobs<- pydata %>%
group_by(ageevent, everoper) %>%
summarize(q=mean(fmarry,na.rm = TRUE),n=n())
## `summarise()` has grouped output by 'ageevent'. You can override using the
## `.groups` argument.
table(ageprobs$ageevent,ageprobs$q)
##
## 0 0.00222866057499443 0.00431034482758621 0.00869565217391304
## 15 1 0 0 0
## 16 0 0 0 0
## 17 0 1 1 0
## 18 0 0 0 1
## 19 0 0 0 0
## 20 0 0 0 0
## 21 0 0 0 0
## 22 0 0 0 0
## 23 0 0 0 0
## 24 0 0 0 0
## 25 0 0 0 0
## 26 0 0 0 0
## 27 0 0 0 0
## 28 0 0 0 0
## 29 0 0 0 0
## 30 0 0 0 0
## 31 0 0 0 0
## 32 0 0 0 0
## 33 0 0 0 0
## 34 0 0 0 0
## 35 0 0 0 0
## 36 0 0 0 0
## 37 0 0 0 0
## 38 0 0 0 0
## 39 0 0 0 0
## 40 0 0 0 0
## 41 1 0 0 0
## 42 0 0 0 0
## 43 0 0 0 0
## 44 1 0 0 0
## 45 1 0 0 0
## 46 1 0 0 0
## 47 2 0 0 0
## 48 2 0 0 0
## 49 1 0 0 0
##
## 0.0142857142857143 0.0144927536231884 0.0150413637503134
## 15 0 0 0
## 16 0 0 0
## 17 0 0 0
## 18 0 0 0
## 19 0 0 1
## 20 0 0 0
## 21 0 0 0
## 22 0 0 0
## 23 0 0 0
## 24 0 0 0
## 25 0 0 0
## 26 0 0 0
## 27 0 0 0
## 28 0 0 0
## 29 0 0 0
## 30 0 0 0
## 31 0 0 0
## 32 0 0 0
## 33 0 0 0
## 34 0 0 0
## 35 0 0 0
## 36 0 0 0
## 37 0 0 0
## 38 0 0 0
## 39 0 1 0
## 40 0 0 0
## 41 0 0 0
## 42 0 0 0
## 43 1 0 0
## 44 0 0 0
## 45 0 0 0
## 46 0 0 0
## 47 0 0 0
## 48 0 0 0
## 49 0 0 0
##
## 0.0185334407735697 0.0216450216450216 0.0222222222222222
## 15 0 0 0
## 16 0 0 0
## 17 0 0 0
## 18 0 1 0
## 19 0 0 0
## 20 1 0 0
## 21 0 0 0
## 22 0 0 0
## 23 0 0 0
## 24 0 0 0
## 25 0 0 0
## 26 0 0 0
## 27 0 0 0
## 28 0 0 0
## 29 0 0 0
## 30 0 0 0
## 31 0 0 0
## 32 0 0 0
## 33 0 0 0
## 34 0 0 0
## 35 0 0 0
## 36 0 0 0
## 37 0 0 0
## 38 0 0 0
## 39 0 0 0
## 40 0 0 0
## 41 0 0 0
## 42 0 0 0
## 43 0 0 0
## 44 0 0 1
## 45 0 0 0
## 46 0 0 0
## 47 0 0 0
## 48 0 0 0
## 49 0 0 0
##
## 0.0247933884297521 0.0261780104712042 0.0266666666666667
## 15 0 0 0
## 16 0 1 0
## 17 0 0 0
## 18 0 0 0
## 19 0 0 0
## 20 0 0 0
## 21 0 0 0
## 22 0 0 0
## 23 0 0 0
## 24 0 0 0
## 25 0 0 0
## 26 0 0 0
## 27 0 0 0
## 28 0 0 0
## 29 0 0 0
## 30 0 0 0
## 31 0 0 0
## 32 0 0 0
## 33 0 0 0
## 34 0 0 0
## 35 0 0 0
## 36 0 0 0
## 37 0 0 0
## 38 0 0 0
## 39 0 0 0
## 40 0 0 0
## 41 0 0 0
## 42 0 0 0
## 43 0 0 0
## 44 0 0 0
## 45 0 0 1
## 46 1 0 0
## 47 0 0 0
## 48 0 0 0
## 49 0 0 0
##
## 0.0271317829457364 0.0293501048218029 0.0298013245033113
## 15 0 0 0
## 16 0 0 0
## 17 0 0 0
## 18 0 0 0
## 19 0 0 0
## 20 0 0 0
## 21 0 0 0
## 22 0 0 0
## 23 0 0 0
## 24 0 0 0
## 25 0 0 0
## 26 0 0 0
## 27 0 0 0
## 28 0 0 0
## 29 0 0 0
## 30 0 0 0
## 31 0 0 0
## 32 0 0 0
## 33 0 0 0
## 34 0 0 0
## 35 0 0 0
## 36 0 0 0
## 37 0 0 0
## 38 0 1 0
## 39 0 0 0
## 40 0 0 0
## 41 0 0 1
## 42 1 0 0
## 43 0 0 0
## 44 0 0 0
## 45 0 0 0
## 46 0 0 0
## 47 0 0 0
## 48 0 0 0
## 49 0 0 0
##
## 0.0321699544764795 0.0327776207968353 0.0336134453781513
## 15 0 0 0
## 16 0 0 0
## 17 0 0 0
## 18 0 0 0
## 19 0 0 0
## 20 0 0 0
## 21 0 1 0
## 22 1 0 0
## 23 0 0 0
## 24 0 0 0
## 25 0 0 0
## 26 0 0 0
## 27 0 0 0
## 28 0 0 0
## 29 0 0 0
## 30 0 0 0
## 31 0 0 0
## 32 0 0 0
## 33 0 0 0
## 34 0 0 0
## 35 0 0 1
## 36 0 0 0
## 37 0 0 0
## 38 0 0 0
## 39 0 0 0
## 40 0 0 0
## 41 0 0 0
## 42 0 0 0
## 43 0 0 0
## 44 0 0 0
## 45 0 0 0
## 46 0 0 0
## 47 0 0 0
## 48 0 0 0
## 49 0 0 0
##
## 0.0355976485956891 0.0391061452513966 0.0430638898693964
## 15 0 0 0
## 16 0 0 0
## 17 0 0 0
## 18 0 0 0
## 19 0 0 0
## 20 0 0 0
## 21 0 0 0
## 22 0 0 0
## 23 1 0 0
## 24 0 0 1
## 25 0 0 0
## 26 0 0 0
## 27 0 0 0
## 28 0 0 0
## 29 0 0 0
## 30 0 0 0
## 31 0 0 0
## 32 0 0 0
## 33 0 0 0
## 34 0 0 0
## 35 0 0 0
## 36 0 0 0
## 37 0 0 0
## 38 0 0 0
## 39 0 0 0
## 40 0 1 0
## 41 0 0 0
## 42 0 0 0
## 43 0 0 0
## 44 0 0 0
## 45 0 0 0
## 46 0 0 0
## 47 0 0 0
## 48 0 0 0
## 49 0 0 0
##
## 0.0432632880098888 0.0442477876106195 0.0462519936204147
## 15 0 0 0
## 16 0 0 0
## 17 0 0 0
## 18 0 0 0
## 19 0 1 0
## 20 0 0 0
## 21 0 0 0
## 22 0 0 0
## 23 0 0 0
## 24 0 0 0
## 25 0 0 0
## 26 0 0 0
## 27 0 0 0
## 28 0 0 0
## 29 0 0 0
## 30 0 0 0
## 31 0 0 0
## 32 0 0 0
## 33 0 0 0
## 34 1 0 0
## 35 0 0 0
## 36 0 0 1
## 37 0 0 0
## 38 0 0 0
## 39 0 0 0
## 40 0 0 0
## 41 0 0 0
## 42 0 0 0
## 43 0 0 0
## 44 0 0 0
## 45 0 0 0
## 46 0 0 0
## 47 0 0 0
## 48 0 0 0
## 49 0 0 0
##
## 0.0473588342440801 0.0495946590367191 0.0507131537242472
## 15 0 0 0
## 16 0 0 0
## 17 0 0 0
## 18 0 0 0
## 19 0 0 0
## 20 0 0 0
## 21 0 0 0
## 22 0 0 0
## 23 0 0 0
## 24 0 0 0
## 25 0 0 0
## 26 0 0 0
## 27 0 1 0
## 28 0 0 0
## 29 0 0 0
## 30 0 0 0
## 31 0 0 1
## 32 0 0 0
## 33 0 0 0
## 34 0 0 0
## 35 0 0 0
## 36 0 0 0
## 37 1 0 0
## 38 0 0 0
## 39 0 0 0
## 40 0 0 0
## 41 0 0 0
## 42 0 0 0
## 43 0 0 0
## 44 0 0 0
## 45 0 0 0
## 46 0 0 0
## 47 0 0 0
## 48 0 0 0
## 49 0 0 0
##
## 0.0515837104072398 0.05170185264972 0.0533049040511727 0.0551724137931034
## 15 0 0 0 0
## 16 0 0 0 0
## 17 0 0 0 0
## 18 0 0 0 0
## 19 0 0 0 0
## 20 0 0 0 0
## 21 0 0 0 0
## 22 0 0 0 0
## 23 0 0 0 0
## 24 0 0 0 0
## 25 0 0 0 0
## 26 0 1 0 0
## 27 0 0 0 0
## 28 0 0 1 0
## 29 0 0 0 0
## 30 0 0 0 1
## 31 0 0 0 0
## 32 1 0 0 0
## 33 0 0 0 0
## 34 0 0 0 0
## 35 0 0 0 0
## 36 0 0 0 0
## 37 0 0 0 0
## 38 0 0 0 0
## 39 0 0 0 0
## 40 0 0 0 0
## 41 0 0 0 0
## 42 0 0 0 0
## 43 0 0 0 0
## 44 0 0 0 0
## 45 0 0 0 0
## 46 0 0 0 0
## 47 0 0 0 0
## 48 0 0 0 0
## 49 0 0 0 0
##
## 0.0553405572755418 0.0568421052631579 0.0588235294117647 0.0625
## 15 0 0 0 0
## 16 0 0 0 0
## 17 0 0 0 0
## 18 0 0 0 0
## 19 0 0 0 0
## 20 0 0 0 0
## 21 0 0 0 0
## 22 0 0 0 0
## 23 0 0 0 0
## 24 0 0 0 0
## 25 1 0 0 0
## 26 0 0 0 0
## 27 0 0 0 0
## 28 0 0 0 0
## 29 0 0 0 0
## 30 0 0 0 0
## 31 0 0 0 0
## 32 0 0 0 0
## 33 0 1 0 0
## 34 0 0 0 0
## 35 0 0 0 0
## 36 0 0 0 0
## 37 0 0 1 0
## 38 0 0 0 1
## 39 0 0 0 0
## 40 0 0 0 0
## 41 0 0 0 0
## 42 0 0 0 0
## 43 0 0 0 0
## 44 0 0 0 0
## 45 0 0 0 0
## 46 0 0 0 0
## 47 0 0 0 0
## 48 0 0 0 0
## 49 0 0 0 0
##
## 0.0655737704918033 0.0666666666666667 0.0694444444444444
## 15 0 0 0
## 16 0 0 0
## 17 0 0 0
## 18 0 0 0
## 19 0 0 0
## 20 0 0 1
## 21 0 0 0
## 22 0 0 0
## 23 0 0 0
## 24 0 0 0
## 25 0 0 0
## 26 0 0 0
## 27 0 0 0
## 28 0 0 0
## 29 1 0 0
## 30 0 0 0
## 31 0 0 0
## 32 0 0 0
## 33 0 0 0
## 34 0 0 0
## 35 0 0 0
## 36 0 0 0
## 37 0 0 0
## 38 0 0 0
## 39 0 1 0
## 40 0 0 0
## 41 0 0 0
## 42 0 0 0
## 43 0 0 0
## 44 0 0 0
## 45 0 0 0
## 46 0 0 0
## 47 0 0 0
## 48 0 0 0
## 49 0 0 0
##
## 0.0697674418604651 0.0754716981132075 0.0828729281767956
## 15 0 0 0
## 16 0 0 0
## 17 0 0 0
## 18 0 0 0
## 19 0 0 0
## 20 0 0 0
## 21 0 0 0
## 22 0 0 1
## 23 0 0 0
## 24 0 0 0
## 25 0 0 0
## 26 0 0 0
## 27 0 0 0
## 28 0 0 0
## 29 0 0 0
## 30 0 0 0
## 31 0 1 0
## 32 0 0 0
## 33 1 0 0
## 34 0 0 0
## 35 0 0 0
## 36 0 0 0
## 37 0 0 0
## 38 0 0 0
## 39 0 0 0
## 40 0 0 0
## 41 0 0 0
## 42 0 0 0
## 43 0 0 0
## 44 0 0 0
## 45 0 0 0
## 46 0 0 0
## 47 0 0 0
## 48 0 0 0
## 49 0 0 0
##
## 0.0833333333333333 0.0952380952380952 0.0995024875621891 0.1
## 15 0 0 0 0
## 16 0 0 0 0
## 17 0 0 0 0
## 18 0 0 0 0
## 19 0 0 0 0
## 20 0 0 0 0
## 21 0 0 1 0
## 22 0 0 0 0
## 23 0 0 0 0
## 24 0 0 0 0
## 25 0 0 0 0
## 26 0 0 0 0
## 27 0 0 0 0
## 28 0 0 0 1
## 29 0 0 0 0
## 30 0 0 0 0
## 31 0 0 0 0
## 32 1 0 0 0
## 33 0 0 0 0
## 34 0 0 0 0
## 35 0 0 0 0
## 36 0 1 0 0
## 37 0 0 0 0
## 38 0 0 0 0
## 39 0 0 0 0
## 40 0 0 0 0
## 41 0 0 0 0
## 42 1 0 0 0
## 43 0 0 0 1
## 44 0 0 0 0
## 45 0 0 0 0
## 46 0 0 0 0
## 47 0 0 0 0
## 48 0 0 0 0
## 49 0 0 0 0
##
## 0.118055555555556 0.126760563380282 0.129032258064516 0.130434782608696
## 15 0 0 0 0
## 16 0 0 0 0
## 17 0 0 0 0
## 18 0 0 0 0
## 19 0 0 0 0
## 20 0 0 0 0
## 21 0 0 0 0
## 22 0 0 0 0
## 23 0 0 0 0
## 24 1 0 0 0
## 25 0 0 0 0
## 26 0 0 0 0
## 27 0 0 0 1
## 28 0 0 0 0
## 29 0 1 0 0
## 30 0 0 1 0
## 31 0 0 0 0
## 32 0 0 0 0
## 33 0 0 0 0
## 34 0 0 0 0
## 35 0 0 0 0
## 36 0 0 0 0
## 37 0 0 0 0
## 38 0 0 0 0
## 39 0 0 0 0
## 40 0 0 0 0
## 41 0 0 0 0
## 42 0 0 0 0
## 43 0 0 0 0
## 44 0 0 0 0
## 45 0 0 0 0
## 46 0 0 0 0
## 47 0 0 0 0
## 48 0 0 0 0
## 49 0 0 0 0
##
## 0.132530120481928 0.14018691588785 0.142857142857143 0.149606299212598
## 15 0 0 0 0
## 16 0 0 0 0
## 17 0 0 0 0
## 18 0 0 0 0
## 19 0 0 0 0
## 20 0 0 0 0
## 21 0 0 0 0
## 22 0 0 0 0
## 23 1 0 0 0
## 24 0 0 0 0
## 25 0 0 0 1
## 26 0 1 0 0
## 27 0 0 0 0
## 28 0 0 0 0
## 29 0 0 0 0
## 30 0 0 0 0
## 31 0 0 0 0
## 32 0 0 0 0
## 33 0 0 0 0
## 34 0 0 0 0
## 35 0 0 0 0
## 36 0 0 0 0
## 37 0 0 0 0
## 38 0 0 0 0
## 39 0 0 0 0
## 40 0 0 1 0
## 41 0 0 0 0
## 42 0 0 0 0
## 43 0 0 0 0
## 44 0 0 0 0
## 45 0 0 0 0
## 46 0 0 0 0
## 47 0 0 0 0
## 48 0 0 0 0
## 49 0 0 0 0
##
## 0.192307692307692 0.289473684210526
## 15 0 0
## 16 0 0
## 17 0 0
## 18 0 0
## 19 0 0
## 20 0 0
## 21 0 0
## 22 0 0
## 23 0 0
## 24 0 0
## 25 0 0
## 26 0 0
## 27 0 0
## 28 0 0
## 29 0 0
## 30 0 0
## 31 0 0
## 32 0 0
## 33 0 0
## 34 0 1
## 35 1 0
## 36 0 0
## 37 0 0
## 38 0 0
## 39 0 0
## 40 0 0
## 41 0 0
## 42 0 0
## 43 0 0
## 44 0 0
## 45 0 0
## 46 0 0
## 47 0 0
## 48 0 0
## 49 0 0
#next I provide a graphic of first marriage rates by age grouped by whether or not the respondent has had a vasectomy or not
#reminder 1= has had a vasectomy and 5= has not had a vasectomy
library(car)
ageprobs$operate<-recode(ageprobs$everoper,"1=1, 5=0")
## Warning: Unreplaced values treated as NA as `.x` is not compatible.
## Please specify replacements exhaustively or supply `.default`.
library(dplyr)
ageprobs$oper<- as.numeric(ageprobs$operate)
## Warning: NAs introduced by coercion
ageprobs$numq <- ageprobs$q*(1-ageprobs$q)
ageprobs$seq <- sqrt(ageprobs$numq/ageprobs$n)
ageprobs$meq <- 1.96*ageprobs$seq
ageprobs$logq <- log(ageprobs$q)
ggplot(data =ageprobs, aes(x = ageevent, y = q, ymin=q-meq, ymax=q+meq, fill=everoper,color=oper, group = everoper)) +
geom_line() + ylim(0,.15) + geom_ribbon(alpha=0.3,aes(color=NULL)) +
labs(title="First Marriage Rates by Age",
subtitle="US-Born Participants",
caption="Source: NSFG Male File 2017-2019") +
xlab(label="Age at Risk") +
ylab(label="First Marriage Rate")
#to more clearly show the differences I provide a graph logged by age
library(ggplot2)
ggplot(data =ageprobs, aes(x = ageevent, y = logq, group = everoper, color=everoper, fill=everoper)) +
geom_line() +
labs(title="First Marriage Rates (Logged) by Age",
subtitle="US-Born Participants",
caption="Source: NSFG Male File 2017-2019") +
xlab(label="Age at Risk") +
ylab(label="First Marriage Rate (Logged")
You can see that the first marriage rate for men who have had a
vasectomy is still lower than men who have not with these graphics. This
follows the original intuition that I mentioned but differs from the KM
survival curve.