library(haven)
library(foreign) 
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(broom)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(readr) 
library(MASS) 
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(car) 
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(alr3)
## 
## Attaching package: 'alr3'
## The following object is masked from 'package:MASS':
## 
##     forbes
library(zoo)
library(nortest)
library(plotrix)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:plotrix':
## 
##     rescale
## The following object is masked from 'package:readr':
## 
##     col_factor
library(tableone)
library(Weighted.Desc.Stat)
library(mitools)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(VGAM)
## Loading required package: stats4
## Loading required package: splines
## 
## Attaching package: 'VGAM'
## The following object is masked from 'package:survey':
## 
##     calibrate
## The following object is masked from 'package:lmtest':
## 
##     lrtest
## The following object is masked from 'package:car':
## 
##     logit
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(sandwich)
library(pastecs)
## 
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
library(survey)
library(muhaz)
library(ggpubr)
library(survminer)
library(eha)
## 
## Attaching package: 'eha'
## The following objects are masked from 'package:VGAM':
## 
##     dgompertz, dmakeham, pgompertz, pmakeham, qgompertz, qmakeham,
##     rgompertz, rmakeham
library(reshape2)
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:reshape2':
## 
##     dcast, melt
## The following objects are masked from 'package:pastecs':
## 
##     first, last
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:pastecs':
## 
##     extract
nlsy97mig<-read.csv("C:\\Users\\Jaire\\OneDrive\\Desktop\\Research\\Data\\nlsy97mig.csv")

A. Transformations:

Outcome:

# Age at interview

nlsy97mig$age1<-ifelse(nlsy97mig$ragem06==-5, NA,nlsy97mig$ragem06/12)

nlsy97mig$age2<-ifelse(nlsy97mig$ragem07==-5, NA,nlsy97mig$ragem07/12)

nlsy97mig$age3<-ifelse(nlsy97mig$ragem08==-5, NA,nlsy97mig$ragem08/12)

nlsy97mig$age4<-ifelse(nlsy97mig$ragem09==-5, NA,nlsy97mig$ragem09/12)

# Migration since last interview
nlsy97mig$mig1<-ifelse(nlsy97mig$remp07==-4, NA,nlsy97mig$rmig07)

nlsy97mig$mig2<-ifelse(nlsy97mig$rmig08==-4, NA,nlsy97mig$rmig08)

nlsy97mig$mig3<-ifelse(nlsy97mig$rmig09==-4, NA,nlsy97mig$rmig09)

nlsy97mig$mig1<-ifelse(nlsy97mig$rmig07==-5, NA,nlsy97mig$rmig07)

nlsy97mig$mig2<-ifelse(nlsy97mig$rmig08==-5, NA,nlsy97mig$rmig08)

nlsy97mig$mig3<-ifelse(nlsy97mig$rmig09==-5, NA,nlsy97mig$rmig09)

#Unemployed at Start of Year
nlsy97mig$unem1<-ifelse(nlsy97mig$remp06==-4, NA,nlsy97mig$remp06)

nlsy97mig$unem2<-ifelse(nlsy97mig$remp07==-4, NA,nlsy97mig$remp07)

nlsy97mig$unem3<-ifelse(nlsy97mig$remp08==-4, NA,nlsy97mig$remp08)

nlsy97mig$unem4<-ifelse(nlsy97mig$remp09==-4, NA,nlsy97mig$remp09)

nlsy97mig$unem1<-recode(nlsy97mig$unem1, recodes = "0:3=0; 4:5=1; 6:201707=0")
nlsy97mig$unem2<-recode(nlsy97mig$unem2, recodes = "0:3=0; 4:5=1; 6:201707=0")
nlsy97mig$unem3<-recode(nlsy97mig$unem3, recodes = "0:3=0; 4:5=1; 6:201709=0")
nlsy97mig$unem4<-recode(nlsy97mig$unem4, recodes = "0:3=0; 4:5=1; 6:201709=0")
table(nlsy97mig$unem2)
## 
##    0    1 
## 6534 1840

Predictors:

# Sex
nlsy97mig$male<-recode(nlsy97mig$rsex, recodes = "1=1; 2=0")

# Race
nlsy97mig$nonwhite<-recode(nlsy97mig$rrace, recodes = "-2=NA;-1=NA;2:5=1;1=0")
nlsy97mig$white<-recode(nlsy97mig$rrace, recodes = "-2=NA;-1=NA;2:5=0")
nlsy97mig$black<-recode(nlsy97mig$rrace, recodes = "-2=NA;-1=NA;1=0;2=1;3:5=0")
nlsy97mig$Asian<-recode(nlsy97mig$rrace, recodes = "-2=NA;-1=NA;1:3=0;4=1;5=0")
nlsy97mig$AmerIn<-recode(nlsy97mig$rrace, recodes = "-2=NA;-1=NA;1:2=0;3=1;4:5=0")
nlsy97mig$other<-recode(nlsy97mig$rrace, recodes = "-2=NA;-1=NA;1:4=0;5=1")



# Hispanic
nlsy97mig$hisp<-recode(nlsy97mig$reth, recodes = "1=0; 2=1; 3:4=0")

# Highest Level of Education 4-year degree or higher
nlsy97mig$none<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;1=1;2:8=0")
nlsy97mig$GED<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;1=0;2=1;3:8=1")
nlsy97mig$HSdip<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;1:2=0;3=1;4:8=1")
nlsy97mig$assoc<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;1:3=0;4=1;5:8=1")
nlsy97mig$bach<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;1:4=0;5=1;6:8=1")
nlsy97mig$mastr<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;1:5=0;6=1;7:8=1")
nlsy97mig$Doct<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;1:6=0;7:8=1")
nlsy97mig$college<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;-1=NA;1:3=0;4:8=1")
nlsy97mig$nocollege<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;1:3=1;4:8=0")


# Immigration Status
nlsy97mig$natbrncit<-recode(nlsy97mig$rcit06, recodes = "-5=NA;-3=NA;1=1;2:6=0")
nlsy97mig$naturcit<-recode(nlsy97mig$rcit06, recodes = "-5=NA;-3=NA;1=0;2=1;3:6=0")
nlsy97mig$appnaturcit<-recode(nlsy97mig$rcit06, recodes = "-5=NA;-3=NA;1:2=0;3=1;4:6=0")
nlsy97mig$permrescit<-recode(nlsy97mig$rcit06, recodes = "-5=NA;-3=NA;1:3=0;4=1;5:6=0")
nlsy97mig$apprescit<-recode(nlsy97mig$rcit06, recodes = "-5=NA;-3=NA;1:4=0;5=1;6=0")
nlsy97mig$othrcit<-recode(nlsy97mig$rcit06, recodes = "-5=NA;-3=NA;1:5=0;6=1")
nlsy97mig$noncit<-recode(nlsy97mig$rcit06, recodes = "-5=NA;-3=NA;1:2=0;3:6=1")
nlsy97mig$immigrant<-recode(nlsy97mig$rcit06, recodes = "-5=NA;-3=NA;6=NA;1=0;2:5=1")

#interaction college race
nlsy97mig$nonwhtcol<-c(nlsy97mig$nonwhite*nlsy97mig$college)

#occupation 
nlsy97mig$rjob1<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA; 10:430=1;500:9990=0")
nlsy97mig$rjob2<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;500 : 950=1;10:9990=0 ")
nlsy97mig$rjob3<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;1000 : 1240=1;10:9990=0")
nlsy97mig$rjob4<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;1300 : 1530=1;10:9990=0")
nlsy97mig$rjob5<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;1540 : 1560=1;10:9990=0")
nlsy97mig$rjob6<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;1600 : 1760=1;10:9990=0")
nlsy97mig$rjob7<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;1800 : 1860=1;10:9990=0")
nlsy97mig$rjob8<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;1900 : 1960=1;10:9990=0")
nlsy97mig$rjob9<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;2000 : 2060=1;10:9990=0")
nlsy97mig$rjob10<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;2100 : 2150=1;10:9990=0")
nlsy97mig$rjob11<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;2200 : 2340=1;10:9990=0")
nlsy97mig$rjob12<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;2400 : 2550=1;10:9990=0")
nlsy97mig$rjob13<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;2600 : 2760=1;10:9990=0")
nlsy97mig$rjob14<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;2800 : 2960=1;10:9990=0")
nlsy97mig$rjob15<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;3000 : 3260=1;10:9990=0")
nlsy97mig$rjob16<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;3300 : 3650=1;10:9990=0")
nlsy97mig$rjob17<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;3700 : 3950=1;10:9990=0")
nlsy97mig$rjob18<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;4000 : 4160=1;10:9990=0")
nlsy97mig$rjob19<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;4200 : 4250=1;10:9990=0")
nlsy97mig$rjob20<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;4300 : 4430=1;10:9990=0")
nlsy97mig$rjob21<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;4460=1;10:9990=0")
nlsy97mig$rjob22<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;4500 : 4650=1;10:9990=0")
nlsy97mig$rjob23<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;4700 : 4960=1;10:9990=0")
nlsy97mig$rjob24<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;5000 : 5930=1;10:9990=0")
nlsy97mig$rjob25<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;6000 : 6130=1;10:9990=0")
nlsy97mig$rjob26<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;6200 : 6940=1;10:9990=0")
nlsy97mig$rjob27<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;7000 : 7620=1;10:9990=0")
nlsy97mig$rjob28<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;7700 : 7750=1;10:9990=0")
nlsy97mig$rjob29<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;7800 : 7850=1;10:9990=0")
nlsy97mig$rjob30<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;7900 : 8960=1;10:9990=0")
nlsy97mig$rjob31<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;9000 : 9750=1;10:9990=0")
nlsy97mig$rjob32<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;9800 : 9840=1;10:9990=0")
nlsy97mig$rjob33<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;9950 : 9990=1;10:9840=0")


#region
nlsy97mig$northeast<-recode(nlsy97mig$rregn06, recodes = "-5=NA;-4=NA; -3=NA; 2:4=0")
nlsy97mig$northcent<-recode(nlsy97mig$rregn06, recodes = "-5=NA;-4=NA; -3=NA; 2=1;1=0;3:4=0")
nlsy97mig$south<-recode(nlsy97mig$rregn06, recodes = "-5=NA;-4=NA; -3=NA;1:2=0;3=1;4=0")
nlsy97mig$west<-recode(nlsy97mig$rregn06, recodes = "-5=NA;-4=NA; -3=NA; 1:3=0;4=1")

#infrastructure 
nlsy97mig$urban<-recode(nlsy97mig$rurbru06, recodes = "-5=NA;-4=NA;-3=NA;2=NA;")
nlsy97mig$rural<-recode(nlsy97mig$rurbru06, recodes = "-5=NA;-4=NA;-3=NA;2=NA;1=0;0=1")
table(nlsy97mig$south)
## 
##    0    1 
## 4513 3002

1 10 TO 430 EXECUTIVE, ADMINISTRATIVE AND MANAGERIAL 2 500 : 950: MANAGEMENT RELATED 3 1000 : 1240: MATHEMATICAL AND COMPUTER SCIENTISTS 4 1300 : 1530: ENGINEERS, ARCHITECTS, AND SURVEYORS 5 1540 : 1560: ENGINEERING AND RELATED TECHNICIANS 6 1600 : 1760: PHYSICAL SCIENTISTS 7 1800 : 1860: SOCIAL SCIENTISTS AND RELATED WORKERS 8 1900 : 1960: LIFE, PHYSICAL, AND SOCIAL SCIENCE TECHNICIANS 9 2000 : 2060: COUNSELORS, SOCIAL, AND RELIGIOUS WORKERS 10 2100 : 2150: LAWYERS, JUDGES, AND LEGAL SUPPORT WORKERS 11 2200 : 2340: TEACHERS 12 2400 : 2550: EDUCATION, TRAINING, AND LIBRARY WORKERS 13 2600 : 2760: ENTERTAINERS AND PERFORMERS, SPORTS AND RELATED WORKERS 14 2800 : 2960: MEDIA AND COMMUNICATION WORKERS 15 3000 : 3260: HEALTH DIAGNOSIS AND TREATING PRACTITIONERS 16 3300 : 3650: HEALTH CARE TECHNICAL AND SUPPORT 17 3700 : 3950: PROTECTIVE SERVICE 18 4000 : 4160: FOOD PREPARATIONS AND SERVING RELATED 19 4200 : 4250: CLEANING AND BUILDING SERVICE 20 4300 : 4430: ENTERTAINMENT ATTENDANTS AND RELATED WORKERS 21 4460 FUNERAL RELATED OCCUPATIONS 22 4500 : 4650: PERSONAL CARE AND SERVICE WORKERS 23 4700 : 4960: SALES AND RELATED WORKERS 24 5000 : 5930: OFFICE AND ADMINISTRATIVE SUPPORT WORKERS 25 6000 : 6130: FARMING, FISHING, AND FORESTRY 26 6200 : 6940: CONSTRUCTION TRADES AND EXTRACTION WORKERS 27 7000 : 7620: INSTALLATION, MAINTENANCE, AND REPAIR WORKERS 28 7700 : 7750: PRODUCTION AND OPERATING WORKERS 29 7800 : 7850: FOOD PREPARATION 30 7900 : 8960: SETTER, OPERATORS, AND TENDERS 31 9000 : 9750: TRANSPORTATION AND MATERIAL MOVING WORKERS 32 9800 : 9840: MILITARY SPECIFIC OCCUPATIONS 33 9950 : 9990: ACS SPECIAL CODES

Censoring and Reshaping of Data:

#subset censoring
nlsy97mig<-subset(nlsy97mig, is.na(unem2)==F&is.na(unem3)==F&
                   is.na(unem4)==F&is.na(age2)==F&
                   is.na(age3)==F&is.na(age4)==F&unem2!=1)
# Reshape

e.long<-reshape(data.frame(nlsy97mig), idvar = "rid",
                varying = list(c("age2", "age3"),
                  c("age3", "age4")),
                v.names = c("age_enter", "age_exit"),
                times = 1:2, direction = "long")
e.long<-e.long[order(e.long$rid, e.long$time),]

e.long$unemtran<-NA

e.long$unemtran[e.long$unem2==0&e.long$unem3==1&e.long$time==1]<-1
e.long$unemtran[e.long$unem3==0&e.long$unem4==1&e.long$time==2]<-1

e.long$unemtran[e.long$unem2==0&e.long$unem3==0&e.long$time==1]<-0
e.long$unemtran[e.long$unem3==0&e.long$unem4==0&e.long$time==2]<-0

#Censor failures 
failed1<-which(is.na(e.long$unemtran)==T)
e.long<-e.long[-failed1,]

e.long$age1r<-round(e.long$age_enter,0)
e.long$age4r<-round(e.long$age_exit,0)
e.long$time_start<-e.long$time-1

head(e.long, n=10)
##     rid    wht psu strat rsex reth rrace rbirthyr rregn06 rurbru06 rjob06
## 1.1   1 323197   1    35    2    4     1     1981       1        1   4760
## 1.2   1 323197   1    35    2    4     1     1981       1        1   4760
## 4.1   4 149099   2    35    2    2     5     1981       1        1   5240
## 4.2   4 149099   2    35    2    2     5     1981       1        1   5240
## 5.1   5 245150   1    35    1    2     1     1982       1        1   3740
## 5.2   5 245150   1    35    1    2     1     1982       1        1   3740
## 6.1   6 218371   1    35    2    2     5     1982       1        1   2540
## 6.2   6 218371   1    35    2    2     5     1982       1        1   2540
## 9.1   9 313027   1    35    1    4     1     1982       1        1   3920
## 9.2   9 313027   1    35    1    4     1     1982       1        1   3920
##     rcit06 reduc06 rmig07 rmig08 rmig09 ragem06 ragem07 ragem08 ragem09 remp06
## 1.1      2       5     -4     -4     -4     302     313     325     337 200103
## 1.2      2       5     -4     -4     -4     302     313     325     337 200103
## 4.1      1       3      1     -4      1     309     320     336     344 200502
## 4.2      1       3      1     -4      1     309     320     336     344 200502
## 5.1      1       3     -4     -4      0     289     300     312     323 200601
## 5.2      1       3     -4     -4      0     289     300     312     323 200601
## 6.1      1       2     -4     -4     -4     302     310     323     334 200402
## 6.2      1       2     -4     -4     -4     302     310     323     334 200402
## 9.1      1       5     -4     -4     -4     289     301     314     325      5
## 9.2      1       5     -4     -4     -4     289     301     314     325      5
##     remp07 remp08 remp09     age1 mig1 mig2 mig3 unem1 unem2 unem3 unem4 male
## 1.1 200702 200702 200702 25.16667   -4   -4   -4     0     0     0     0    0
## 1.2 200702 200702 200702 25.16667   -4   -4   -4     0     0     0     0    0
## 4.1 200502 200502 200802 25.75000    1   -4    1     0     0     0     0    0
## 4.2 200502 200502 200802 25.75000    1   -4    1     0     0     0     0    0
## 5.1 200602 200602 200602 24.08333   -4   -4    0     0     0     0     0    1
## 5.2 200602 200602 200602 24.08333   -4   -4    0     0     0     0     0    1
## 6.1 200402 200402 200402 25.16667   -4   -4   -4     0     0     0     0    0
## 6.2 200402 200402 200402 25.16667   -4   -4   -4     0     0     0     0    0
## 9.1 200601 200601 200601 24.08333   -4   -4   -4     1     0     0     0    1
## 9.2 200601 200601 200601 24.08333   -4   -4   -4     1     0     0     0    1
##     nonwhite white black Asian AmerIn other hisp none GED HSdip assoc bach
## 1.1        0     1     0     0      0     0    0    0   1     1     1    1
## 1.2        0     1     0     0      0     0    0    0   1     1     1    1
## 4.1        1     0     0     0      0     1    1    0   1     1     0    0
## 4.2        1     0     0     0      0     1    1    0   1     1     0    0
## 5.1        0     1     0     0      0     0    1    0   1     1     0    0
## 5.2        0     1     0     0      0     0    1    0   1     1     0    0
## 6.1        1     0     0     0      0     1    1    0   1     0     0    0
## 6.2        1     0     0     0      0     1    1    0   1     0     0    0
## 9.1        0     1     0     0      0     0    0    0   1     1     1    1
## 9.2        0     1     0     0      0     0    0    0   1     1     1    1
##     mastr Doct college nocollege natbrncit naturcit appnaturcit permrescit
## 1.1     0    0       1         0         0        1           0          0
## 1.2     0    0       1         0         0        1           0          0
## 4.1     0    0       0         1         1        0           0          0
## 4.2     0    0       0         1         1        0           0          0
## 5.1     0    0       0         1         1        0           0          0
## 5.2     0    0       0         1         1        0           0          0
## 6.1     0    0       0         1         1        0           0          0
## 6.2     0    0       0         1         1        0           0          0
## 9.1     0    0       1         0         1        0           0          0
## 9.2     0    0       1         0         1        0           0          0
##     apprescit othrcit noncit immigrant nonwhtcol rjob1 rjob2 rjob3 rjob4 rjob5
## 1.1         0       0      0         1         0     0     0     0     0     0
## 1.2         0       0      0         1         0     0     0     0     0     0
## 4.1         0       0      0         0         0     0     0     0     0     0
## 4.2         0       0      0         0         0     0     0     0     0     0
## 5.1         0       0      0         0         0     0     0     0     0     0
## 5.2         0       0      0         0         0     0     0     0     0     0
## 6.1         0       0      0         0         0     0     0     0     0     0
## 6.2         0       0      0         0         0     0     0     0     0     0
## 9.1         0       0      0         0         0     0     0     0     0     0
## 9.2         0       0      0         0         0     0     0     0     0     0
##     rjob6 rjob7 rjob8 rjob9 rjob10 rjob11 rjob12 rjob13 rjob14 rjob15 rjob16
## 1.1     0     0     0     0      0      0      0      0      0      0      0
## 1.2     0     0     0     0      0      0      0      0      0      0      0
## 4.1     0     0     0     0      0      0      0      0      0      0      0
## 4.2     0     0     0     0      0      0      0      0      0      0      0
## 5.1     0     0     0     0      0      0      0      0      0      0      0
## 5.2     0     0     0     0      0      0      0      0      0      0      0
## 6.1     0     0     0     0      0      0      1      0      0      0      0
## 6.2     0     0     0     0      0      0      1      0      0      0      0
## 9.1     0     0     0     0      0      0      0      0      0      0      0
## 9.2     0     0     0     0      0      0      0      0      0      0      0
##     rjob17 rjob18 rjob19 rjob20 rjob21 rjob22 rjob23 rjob24 rjob25 rjob26
## 1.1      0      0      0      0      0      0      1      0      0      0
## 1.2      0      0      0      0      0      0      1      0      0      0
## 4.1      0      0      0      0      0      0      0      1      0      0
## 4.2      0      0      0      0      0      0      0      1      0      0
## 5.1      1      0      0      0      0      0      0      0      0      0
## 5.2      1      0      0      0      0      0      0      0      0      0
## 6.1      0      0      0      0      0      0      0      0      0      0
## 6.2      0      0      0      0      0      0      0      0      0      0
## 9.1      1      0      0      0      0      0      0      0      0      0
## 9.2      1      0      0      0      0      0      0      0      0      0
##     rjob27 rjob28 rjob29 rjob30 rjob31 rjob32 rjob33 northeast northcent south
## 1.1      0      0      0      0      0      0      0         1         0     0
## 1.2      0      0      0      0      0      0      0         1         0     0
## 4.1      0      0      0      0      0      0      0         1         0     0
## 4.2      0      0      0      0      0      0      0         1         0     0
## 5.1      0      0      0      0      0      0      0         1         0     0
## 5.2      0      0      0      0      0      0      0         1         0     0
## 6.1      0      0      0      0      0      0      0         1         0     0
## 6.2      0      0      0      0      0      0      0         1         0     0
## 9.1      0      0      0      0      0      0      0         1         0     0
## 9.2      0      0      0      0      0      0      0         1         0     0
##     west urban rural time age_enter age_exit unemtran age1r age4r time_start
## 1.1    0     1     0    1  26.08333 27.08333        0    26    27          0
## 1.2    0     1     0    2  27.08333 28.08333        0    27    28          1
## 4.1    0     1     0    1  26.66667 28.00000        0    27    28          0
## 4.2    0     1     0    2  28.00000 28.66667        0    28    29          1
## 5.1    0     1     0    1  25.00000 26.00000        0    25    26          0
## 5.2    0     1     0    2  26.00000 26.91667        0    26    27          1
## 6.1    0     1     0    1  25.83333 26.91667        0    26    27          0
## 6.2    0     1     0    2  26.91667 27.83333        0    27    28          1
## 9.1    0     1     0    1  25.08333 26.16667        0    25    26          0
## 9.2    0     1     0    2  26.16667 27.08333        0    26    27          1

Descriptive Analysis of Rates:

e.long%>%
  group_by(time)%>%
  summarise(prop_event= mean(unemtran, na.rm=T))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
##    time prop_event
##   <int>      <dbl>
## 1     1      0.115
## 2     2      0.117
e.long%>%
  filter(complete.cases(time,white,hisp, Asian, AmerIn,other, nonwhite, nocollege,rjob30,rural,black, northeast, northcent, south, west))%>%
  group_by(time,white,hisp, Asian, AmerIn,other, nonwhite, nocollege,rjob30,rural,black, northeast, northcent, south, west)%>%
  summarise(prop_event= mean(unemtran, na.rm=T))
## `summarise()` regrouping output by 'time', 'white', 'hisp', 'Asian', 'AmerIn', 'other', 'nonwhite', 'nocollege', 'rjob30', 'rural', 'black', 'northeast', 'northcent', 'south' (override with `.groups` argument)
## # A tibble: 280 x 16
## # Groups:   time, white, hisp, Asian, AmerIn, other, nonwhite, nocollege,
## #   rjob30, rural, black, northeast, northcent, south [280]
##     time white  hisp Asian AmerIn other nonwhite nocollege rjob30 rural black
##    <int> <dbl> <dbl> <dbl>  <dbl> <dbl>    <dbl>     <dbl>  <dbl> <dbl> <dbl>
##  1     1     0     0     0      0     0        1         0      0     0     1
##  2     1     0     0     0      0     0        1         0      0     0     1
##  3     1     0     0     0      0     0        1         0      0     0     1
##  4     1     0     0     0      0     0        1         0      0     0     1
##  5     1     0     0     0      0     0        1         0      0     1     1
##  6     1     0     0     0      0     0        1         0      0     1     1
##  7     1     0     0     0      0     0        1         0      1     0     1
##  8     1     0     0     0      0     0        1         0      1     0     1
##  9     1     0     0     0      0     0        1         1      0     0     1
## 10     1     0     0     0      0     0        1         1      0     0     1
## # ... with 270 more rows, and 5 more variables: northeast <dbl>,
## #   northcent <dbl>, south <dbl>, west <dbl>, prop_event <dbl>

Full Survey Design:

options("survey.adjust.domain.lonely"=T)
options("survey.lonely.psu"='adjust')
options(survey.lonely.psu = "adjust")
e.long<-e.long%>%
  filter(complete.cases(psu,time,white,hisp, Asian, AmerIn,other, nonwhite, nocollege,rjob30,rural,black, northeast, northcent, south, west))

des<-svydesign(nest = TRUE,ids =~psu,
               strata =~strat, weights =~wht,
               data = e.long)
svymean(~strat, design=des)
##         mean     SE
## strat 39.413 0.6537

Analysis:

# Intercept
fit1<-svyglm(unemtran~as.factor(time_start)-1,
              design=des, family=binomial(link="logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
# Black race
fit2<-svyglm(unemtran~as.factor(time_start)+black-1,
              design=des, family=binomial(link="logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
# Other races
fit3<-svyglm(unemtran~as.factor(time_start)+black+Asian+white+hisp+male-1,
              design=des, family=binomial(link="logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
# highest level of education
fit4<-svyglm(unemtran~as.factor(time_start)+black+Asian+white+hisp+male+college-1,
              design=des, family=binomial(link="logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
# High/low risk occupations
fit5<-svyglm(unemtran~as.factor(time_start)+black+Asian+white+hisp+male+college+rjob30-1,
              design=des, family=binomial(link="logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
# Race education interaction
fit6<-svyglm(unemtran~as.factor(time_start)+black+Asian+white+hisp+male+college+rjob30+west+northeast+south-1,
              design=des, family=binomial(link="logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
fit7<-svyglm(unemtran~as.factor(time_start)+black*college+Asian+white+hisp+male+rjob30+west+ northeast+south-1,
              design=des, family=binomial(link="logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit1)
## 
## Call:
## svyglm(formula = unemtran ~ as.factor(time_start) - 1, design = des, 
##     family = binomial(link = "logit"))
## 
## Survey design:
## svydesign(nest = TRUE, ids = ~psu, strata = ~strat, weights = ~wht, 
##     data = e.long)
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## as.factor(time_start)0 -2.09427    0.05391  -38.85   <2e-16 ***
## as.factor(time_start)1 -2.07461    0.04515  -45.95   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.00011)
## 
## Number of Fisher Scoring iterations: 4
summary(fit2)
## 
## Call:
## svyglm(formula = unemtran ~ as.factor(time_start) + black - 1, 
##     design = des, family = binomial(link = "logit"))
## 
## Survey design:
## svydesign(nest = TRUE, ids = ~psu, strata = ~strat, weights = ~wht, 
##     data = e.long)
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## as.factor(time_start)0 -2.15563    0.05592 -38.550  < 2e-16 ***
## as.factor(time_start)1 -2.13427    0.04828 -44.203  < 2e-16 ***
## black                   0.35393    0.07923   4.467 1.88e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1.000142)
## 
## Number of Fisher Scoring iterations: 4
summary(fit3)
## 
## Call:
## svyglm(formula = unemtran ~ as.factor(time_start) + black + Asian + 
##     white + hisp + male - 1, design = des, family = binomial(link = "logit"))
## 
## Survey design:
## svydesign(nest = TRUE, ids = ~psu, strata = ~strat, weights = ~wht, 
##     data = e.long)
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## as.factor(time_start)0 -2.19361    0.13513 -16.233  < 2e-16 ***
## as.factor(time_start)1 -2.16830    0.13548 -16.004  < 2e-16 ***
## black                   0.52054    0.14716   3.537 0.000593 ***
## Asian                   0.07289    0.33417   0.218 0.827733    
## white                   0.17913    0.12717   1.409 0.161779    
## hisp                    0.17385    0.11299   1.539 0.126752    
## male                   -0.30020    0.07492  -4.007 0.000112 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9994058)
## 
## Number of Fisher Scoring iterations: 4
summary(fit4)
## 
## Call:
## svyglm(formula = unemtran ~ as.factor(time_start) + black + Asian + 
##     white + hisp + male + college - 1, design = des, family = binomial(link = "logit"))
## 
## Survey design:
## svydesign(nest = TRUE, ids = ~psu, strata = ~strat, weights = ~wht, 
##     data = e.long)
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## as.factor(time_start)0 -1.95192    0.13549 -14.407  < 2e-16 ***
## as.factor(time_start)1 -1.91834    0.13843 -13.858  < 2e-16 ***
## black                   0.42905    0.14272   3.006  0.00328 ** 
## Asian                   0.18541    0.34936   0.531  0.59669    
## white                   0.18035    0.12303   1.466  0.14555    
## hisp                    0.06944    0.11505   0.604  0.54736    
## male                   -0.36975    0.07805  -4.737 6.57e-06 ***
## college                -0.64950    0.09880  -6.574 1.75e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9993826)
## 
## Number of Fisher Scoring iterations: 5
summary(fit5)
## 
## Call:
## svyglm(formula = unemtran ~ as.factor(time_start) + black + Asian + 
##     white + hisp + male + college + rjob30 - 1, design = des, 
##     family = binomial(link = "logit"))
## 
## Survey design:
## svydesign(nest = TRUE, ids = ~psu, strata = ~strat, weights = ~wht, 
##     data = e.long)
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## as.factor(time_start)0 -1.97966    0.13689 -14.462  < 2e-16 ***
## as.factor(time_start)1 -1.94434    0.13916 -13.972  < 2e-16 ***
## black                   0.43422    0.14320   3.032  0.00304 ** 
## Asian                   0.19404    0.34794   0.558  0.57822    
## white                   0.18184    0.12305   1.478  0.14237    
## hisp                    0.07889    0.11579   0.681  0.49712    
## male                   -0.38351    0.07836  -4.894 3.47e-06 ***
## college                -0.62388    0.09810  -6.360 4.98e-09 ***
## rjob30                  0.48497    0.16099   3.012  0.00323 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9998205)
## 
## Number of Fisher Scoring iterations: 5
summary(fit6)
## 
## Call:
## svyglm(formula = unemtran ~ as.factor(time_start) + black + Asian + 
##     white + hisp + male + college + rjob30 + west + northeast + 
##     south - 1, design = des, family = binomial(link = "logit"))
## 
## Survey design:
## svydesign(nest = TRUE, ids = ~psu, strata = ~strat, weights = ~wht, 
##     data = e.long)
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## as.factor(time_start)0 -2.00884    0.15353 -13.085  < 2e-16 ***
## as.factor(time_start)1 -1.97340    0.15745 -12.534  < 2e-16 ***
## black                   0.45163    0.14833   3.045  0.00294 ** 
## Asian                   0.17518    0.34688   0.505  0.61461    
## white                   0.19387    0.12620   1.536  0.12749    
## hisp                    0.05849    0.11749   0.498  0.61965    
## male                   -0.38277    0.07827  -4.890 3.64e-06 ***
## college                -0.62288    0.09885  -6.301 7.05e-09 ***
## rjob30                  0.49058    0.16131   3.041  0.00298 ** 
## west                    0.10851    0.13043   0.832  0.40734    
## northeast              -0.07642    0.10817  -0.706  0.48145    
## south                   0.02173    0.09057   0.240  0.81084    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9990917)
## 
## Number of Fisher Scoring iterations: 5
summary(fit7)
## 
## Call:
## svyglm(formula = unemtran ~ as.factor(time_start) + black * college + 
##     Asian + white + hisp + male + rjob30 + west + northeast + 
##     south - 1, design = des, family = binomial(link = "logit"))
## 
## Survey design:
## svydesign(nest = TRUE, ids = ~psu, strata = ~strat, weights = ~wht, 
##     data = e.long)
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## as.factor(time_start)0 -2.03357    0.15330 -13.265  < 2e-16 ***
## as.factor(time_start)1 -1.99710    0.15790 -12.648  < 2e-16 ***
## black                   0.54921    0.14794   3.712 0.000332 ***
## college                -0.54111    0.10999  -4.919 3.26e-06 ***
## Asian                   0.16216    0.34503   0.470 0.639346    
## white                   0.19371    0.12657   1.530 0.128937    
## hisp                    0.06940    0.11686   0.594 0.553896    
## male                   -0.38496    0.07788  -4.943 2.96e-06 ***
## rjob30                  0.49703    0.16085   3.090 0.002567 ** 
## west                    0.11036    0.13016   0.848 0.398469    
## northeast              -0.07584    0.10806  -0.702 0.484368    
## south                   0.02680    0.09005   0.298 0.766589    
## black:college          -0.68555    0.22653  -3.026 0.003120 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9973726)
## 
## Number of Fisher Scoring iterations: 5

Model Fit Comparison:

AIC1<-AIC(fit1)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC2<-AIC(fit2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC3<-AIC(fit3)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC4<-AIC(fit4)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC5<-AIC(fit5)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC6<-AIC(fit6)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC7<-AIC(fit7)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
print(AIC1)
##      eff.p        AIC   deltabar 
##    2.22834 6322.01749    1.11417
print(AIC2)
##       eff.p         AIC    deltabar 
##    3.105504 6307.465890    1.035168
print(AIC3)
##       eff.p         AIC    deltabar 
##    8.112278 6295.210395    1.158897
print(AIC4)
##       eff.p         AIC    deltabar 
##    9.812926 6228.181162    1.226616
print(AIC5)
##       eff.p         AIC    deltabar 
##   11.025625 6220.680351    1.225069
print(AIC6)
##      eff.p        AIC   deltabar 
##   14.74524 6225.34180    1.22877
print(AIC7)
##       eff.p         AIC    deltabar 
##   15.381496 6219.422384    1.183192

Hazard Rates:

sums<-data.frame(round(summary(fit7)$coef, 3))
sums$HR<-round(exp(sums[,1]), 3)
sums
##                        Estimate Std..Error t.value Pr...t..    HR
## as.factor(time_start)0   -2.034      0.153 -13.265    0.000 0.131
## as.factor(time_start)1   -1.997      0.158 -12.648    0.000 0.136
## black                     0.549      0.148   3.712    0.000 1.732
## college                  -0.541      0.110  -4.919    0.000 0.582
## Asian                     0.162      0.345   0.470    0.639 1.176
## white                     0.194      0.127   1.530    0.129 1.214
## hisp                      0.069      0.117   0.594    0.554 1.071
## male                     -0.385      0.078  -4.943    0.000 0.680
## rjob30                    0.497      0.161   3.090    0.003 1.644
## west                      0.110      0.130   0.848    0.398 1.116
## northeast                -0.076      0.108  -0.702    0.484 0.927
## south                     0.027      0.090   0.298    0.767 1.027
## black:college            -0.686      0.227  -3.026    0.003 0.504
desstat<-data.frame(nlsy97mig$black, nlsy97mig$college, nlsy97mig$Asian, nlsy97mig$white, nlsy97mig$hisp, nlsy97mig$male, nlsy97mig$rjob30, nlsy97mig$west, nlsy97mig$northeast, nlsy97mig$south) 
options(scipen = 100)
options(digits = 2)
stat.desc(desstat, basic = F)
##              nlsy97mig.black nlsy97mig.college nlsy97mig.Asian nlsy97mig.white
## median                 0.000            0.0000          0.0000          1.0000
## mean                   0.259            0.2913          0.0174          0.5986
## SE.mean                0.006            0.0063          0.0018          0.0067
## CI.mean.0.95           0.012            0.0124          0.0035          0.0132
## var                    0.192            0.2065          0.0171          0.2403
## std.dev                0.438            0.4544          0.1309          0.4902
## coef.var               1.690            1.5601          7.5077          0.8189
##              nlsy97mig.hisp nlsy97mig.male nlsy97mig.rjob30 nlsy97mig.west
## median               0.0000         1.0000           0.0000         0.0000
## mean                 0.2185         0.5133           0.0399         0.2311
## SE.mean              0.0056         0.0068           0.0028         0.0059
## CI.mean.0.95         0.0110         0.0134           0.0054         0.0115
## var                  0.1708         0.2499           0.0383         0.1777
## std.dev              0.4133         0.4999           0.1957         0.4216
## coef.var             1.8911         0.9739           4.9081         1.8242
##              nlsy97mig.northeast nlsy97mig.south
## median                    0.0000          0.0000
## mean                      0.1556          0.3938
## SE.mean                   0.0050          0.0068
## CI.mean.0.95              0.0099          0.0133
## var                       0.1314          0.2388
## std.dev                   0.3625          0.4886
## coef.var                  2.3302          1.2408
stat.desc(nlsy97mig$unem1, basic = F)
##       median         mean      SE.mean CI.mean.0.95          var      std.dev 
##       0.0000       0.1505       0.0049       0.0096       0.1279       0.3576 
##     coef.var 
##       2.3758
stat.desc(nlsy97mig$unem3, basic = F)
##       median         mean      SE.mean CI.mean.0.95          var      std.dev 
##       0.0000       0.1152       0.0044       0.0085       0.1020       0.3193 
##     coef.var 
##       2.7714
stat.desc(nlsy97mig$unem4, basic = F)
##       median         mean      SE.mean CI.mean.0.95          var      std.dev 
##       0.0000       0.1583       0.0050       0.0098       0.1333       0.3651 
##     coef.var 
##       2.3058