library(haven)
library(foreign)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(broom)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(readr)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(car)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(alr3)
##
## Attaching package: 'alr3'
## The following object is masked from 'package:MASS':
##
## forbes
library(zoo)
library(nortest)
library(plotrix)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:plotrix':
##
## rescale
## The following object is masked from 'package:readr':
##
## col_factor
library(tableone)
library(Weighted.Desc.Stat)
library(mitools)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(VGAM)
## Loading required package: stats4
## Loading required package: splines
##
## Attaching package: 'VGAM'
## The following object is masked from 'package:survey':
##
## calibrate
## The following object is masked from 'package:lmtest':
##
## lrtest
## The following object is masked from 'package:car':
##
## logit
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(sandwich)
library(pastecs)
##
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
##
## first, last
library(survey)
library(muhaz)
library(ggpubr)
library(survminer)
library(eha)
##
## Attaching package: 'eha'
## The following objects are masked from 'package:VGAM':
##
## dgompertz, dmakeham, pgompertz, pmakeham, qgompertz, qmakeham,
## rgompertz, rmakeham
library(reshape2)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
## The following objects are masked from 'package:pastecs':
##
## first, last
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:pastecs':
##
## extract
nlsy97mig<-read.csv("C:\\Users\\Jaire\\OneDrive\\Desktop\\Research\\Data\\nlsy97mig.csv")
# Age at interview
nlsy97mig$age1<-ifelse(nlsy97mig$ragem06==-5, NA,nlsy97mig$ragem06/12)
nlsy97mig$age2<-ifelse(nlsy97mig$ragem07==-5, NA,nlsy97mig$ragem07/12)
nlsy97mig$age3<-ifelse(nlsy97mig$ragem08==-5, NA,nlsy97mig$ragem08/12)
nlsy97mig$age4<-ifelse(nlsy97mig$ragem09==-5, NA,nlsy97mig$ragem09/12)
# Migration since last interview
nlsy97mig$mig1<-ifelse(nlsy97mig$remp07==-4, NA,nlsy97mig$rmig07)
nlsy97mig$mig2<-ifelse(nlsy97mig$rmig08==-4, NA,nlsy97mig$rmig08)
nlsy97mig$mig3<-ifelse(nlsy97mig$rmig09==-4, NA,nlsy97mig$rmig09)
nlsy97mig$mig1<-ifelse(nlsy97mig$rmig07==-5, NA,nlsy97mig$rmig07)
nlsy97mig$mig2<-ifelse(nlsy97mig$rmig08==-5, NA,nlsy97mig$rmig08)
nlsy97mig$mig3<-ifelse(nlsy97mig$rmig09==-5, NA,nlsy97mig$rmig09)
#Unemployed at Start of Year
nlsy97mig$unem1<-ifelse(nlsy97mig$remp06==-4, NA,nlsy97mig$remp06)
nlsy97mig$unem2<-ifelse(nlsy97mig$remp07==-4, NA,nlsy97mig$remp07)
nlsy97mig$unem3<-ifelse(nlsy97mig$remp08==-4, NA,nlsy97mig$remp08)
nlsy97mig$unem4<-ifelse(nlsy97mig$remp09==-4, NA,nlsy97mig$remp09)
nlsy97mig$unem1<-recode(nlsy97mig$unem1, recodes = "0:3=0; 4:5=1; 6:201707=0")
nlsy97mig$unem2<-recode(nlsy97mig$unem2, recodes = "0:3=0; 4:5=1; 6:201707=0")
nlsy97mig$unem3<-recode(nlsy97mig$unem3, recodes = "0:3=0; 4:5=1; 6:201709=0")
nlsy97mig$unem4<-recode(nlsy97mig$unem4, recodes = "0:3=0; 4:5=1; 6:201709=0")
table(nlsy97mig$unem2)
##
## 0 1
## 6534 1840
# Sex
nlsy97mig$male<-recode(nlsy97mig$rsex, recodes = "1=1; 2=0")
# Race
nlsy97mig$nonwhite<-recode(nlsy97mig$rrace, recodes = "-2=NA;-1=NA;2:5=1;1=0")
nlsy97mig$white<-recode(nlsy97mig$rrace, recodes = "-2=NA;-1=NA;2:5=0")
nlsy97mig$black<-recode(nlsy97mig$rrace, recodes = "-2=NA;-1=NA;1=0;2=1;3:5=0")
nlsy97mig$Asian<-recode(nlsy97mig$rrace, recodes = "-2=NA;-1=NA;1:3=0;4=1;5=0")
nlsy97mig$AmerIn<-recode(nlsy97mig$rrace, recodes = "-2=NA;-1=NA;1:2=0;3=1;4:5=0")
nlsy97mig$other<-recode(nlsy97mig$rrace, recodes = "-2=NA;-1=NA;1:4=0;5=1")
# Hispanic
nlsy97mig$hisp<-recode(nlsy97mig$reth, recodes = "1=0; 2=1; 3:4=0")
# Highest Level of Education 4-year degree or higher
nlsy97mig$none<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;1=1;2:8=0")
nlsy97mig$GED<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;1=0;2=1;3:8=1")
nlsy97mig$HSdip<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;1:2=0;3=1;4:8=1")
nlsy97mig$assoc<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;1:3=0;4=1;5:8=1")
nlsy97mig$bach<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;1:4=0;5=1;6:8=1")
nlsy97mig$mastr<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;1:5=0;6=1;7:8=1")
nlsy97mig$Doct<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;1:6=0;7:8=1")
nlsy97mig$college<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;-1=NA;1:3=0;4:8=1")
nlsy97mig$nocollege<-recode(nlsy97mig$reduc06, recodes = "-5=NA;-2=NA;1:3=1;4:8=0")
# Immigration Status
nlsy97mig$natbrncit<-recode(nlsy97mig$rcit06, recodes = "-5=NA;-3=NA;1=1;2:6=0")
nlsy97mig$naturcit<-recode(nlsy97mig$rcit06, recodes = "-5=NA;-3=NA;1=0;2=1;3:6=0")
nlsy97mig$appnaturcit<-recode(nlsy97mig$rcit06, recodes = "-5=NA;-3=NA;1:2=0;3=1;4:6=0")
nlsy97mig$permrescit<-recode(nlsy97mig$rcit06, recodes = "-5=NA;-3=NA;1:3=0;4=1;5:6=0")
nlsy97mig$apprescit<-recode(nlsy97mig$rcit06, recodes = "-5=NA;-3=NA;1:4=0;5=1;6=0")
nlsy97mig$othrcit<-recode(nlsy97mig$rcit06, recodes = "-5=NA;-3=NA;1:5=0;6=1")
nlsy97mig$noncit<-recode(nlsy97mig$rcit06, recodes = "-5=NA;-3=NA;1:2=0;3:6=1")
nlsy97mig$immigrant<-recode(nlsy97mig$rcit06, recodes = "-5=NA;-3=NA;6=NA;1=0;2:5=1")
#interaction college race
nlsy97mig$nonwhtcol<-c(nlsy97mig$nonwhite*nlsy97mig$college)
#occupation
nlsy97mig$rjob1<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA; 10:430=1;500:9990=0")
nlsy97mig$rjob2<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;500 : 950=1;10:9990=0 ")
nlsy97mig$rjob3<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;1000 : 1240=1;10:9990=0")
nlsy97mig$rjob4<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;1300 : 1530=1;10:9990=0")
nlsy97mig$rjob5<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;1540 : 1560=1;10:9990=0")
nlsy97mig$rjob6<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;1600 : 1760=1;10:9990=0")
nlsy97mig$rjob7<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;1800 : 1860=1;10:9990=0")
nlsy97mig$rjob8<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;1900 : 1960=1;10:9990=0")
nlsy97mig$rjob9<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;2000 : 2060=1;10:9990=0")
nlsy97mig$rjob10<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;2100 : 2150=1;10:9990=0")
nlsy97mig$rjob11<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;2200 : 2340=1;10:9990=0")
nlsy97mig$rjob12<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;2400 : 2550=1;10:9990=0")
nlsy97mig$rjob13<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;2600 : 2760=1;10:9990=0")
nlsy97mig$rjob14<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;2800 : 2960=1;10:9990=0")
nlsy97mig$rjob15<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;3000 : 3260=1;10:9990=0")
nlsy97mig$rjob16<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;3300 : 3650=1;10:9990=0")
nlsy97mig$rjob17<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;3700 : 3950=1;10:9990=0")
nlsy97mig$rjob18<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;4000 : 4160=1;10:9990=0")
nlsy97mig$rjob19<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;4200 : 4250=1;10:9990=0")
nlsy97mig$rjob20<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;4300 : 4430=1;10:9990=0")
nlsy97mig$rjob21<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;4460=1;10:9990=0")
nlsy97mig$rjob22<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;4500 : 4650=1;10:9990=0")
nlsy97mig$rjob23<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;4700 : 4960=1;10:9990=0")
nlsy97mig$rjob24<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;5000 : 5930=1;10:9990=0")
nlsy97mig$rjob25<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;6000 : 6130=1;10:9990=0")
nlsy97mig$rjob26<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;6200 : 6940=1;10:9990=0")
nlsy97mig$rjob27<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;7000 : 7620=1;10:9990=0")
nlsy97mig$rjob28<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;7700 : 7750=1;10:9990=0")
nlsy97mig$rjob29<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;7800 : 7850=1;10:9990=0")
nlsy97mig$rjob30<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;7900 : 8960=1;10:9990=0")
nlsy97mig$rjob31<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;9000 : 9750=1;10:9990=0")
nlsy97mig$rjob32<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;9800 : 9840=1;10:9990=0")
nlsy97mig$rjob33<-recode(nlsy97mig$rjob06, recodes = "-5=NA;-3=NA;-4=NA;9950 : 9990=1;10:9840=0")
#region
nlsy97mig$northeast<-recode(nlsy97mig$rregn06, recodes = "-5=NA;-4=NA; -3=NA; 2:4=0")
nlsy97mig$northcent<-recode(nlsy97mig$rregn06, recodes = "-5=NA;-4=NA; -3=NA; 2=1;1=0;3:4=0")
nlsy97mig$south<-recode(nlsy97mig$rregn06, recodes = "-5=NA;-4=NA; -3=NA;1:2=0;3=1;4=0")
nlsy97mig$west<-recode(nlsy97mig$rregn06, recodes = "-5=NA;-4=NA; -3=NA; 1:3=0;4=1")
#infrastructure
nlsy97mig$urban<-recode(nlsy97mig$rurbru06, recodes = "-5=NA;-4=NA;-3=NA;2=NA;")
nlsy97mig$rural<-recode(nlsy97mig$rurbru06, recodes = "-5=NA;-4=NA;-3=NA;2=NA;1=0;0=1")
table(nlsy97mig$south)
##
## 0 1
## 4513 3002
1 10 TO 430 EXECUTIVE, ADMINISTRATIVE AND MANAGERIAL 2 500 : 950: MANAGEMENT RELATED 3 1000 : 1240: MATHEMATICAL AND COMPUTER SCIENTISTS 4 1300 : 1530: ENGINEERS, ARCHITECTS, AND SURVEYORS 5 1540 : 1560: ENGINEERING AND RELATED TECHNICIANS 6 1600 : 1760: PHYSICAL SCIENTISTS 7 1800 : 1860: SOCIAL SCIENTISTS AND RELATED WORKERS 8 1900 : 1960: LIFE, PHYSICAL, AND SOCIAL SCIENCE TECHNICIANS 9 2000 : 2060: COUNSELORS, SOCIAL, AND RELIGIOUS WORKERS 10 2100 : 2150: LAWYERS, JUDGES, AND LEGAL SUPPORT WORKERS 11 2200 : 2340: TEACHERS 12 2400 : 2550: EDUCATION, TRAINING, AND LIBRARY WORKERS 13 2600 : 2760: ENTERTAINERS AND PERFORMERS, SPORTS AND RELATED WORKERS 14 2800 : 2960: MEDIA AND COMMUNICATION WORKERS 15 3000 : 3260: HEALTH DIAGNOSIS AND TREATING PRACTITIONERS 16 3300 : 3650: HEALTH CARE TECHNICAL AND SUPPORT 17 3700 : 3950: PROTECTIVE SERVICE 18 4000 : 4160: FOOD PREPARATIONS AND SERVING RELATED 19 4200 : 4250: CLEANING AND BUILDING SERVICE 20 4300 : 4430: ENTERTAINMENT ATTENDANTS AND RELATED WORKERS 21 4460 FUNERAL RELATED OCCUPATIONS 22 4500 : 4650: PERSONAL CARE AND SERVICE WORKERS 23 4700 : 4960: SALES AND RELATED WORKERS 24 5000 : 5930: OFFICE AND ADMINISTRATIVE SUPPORT WORKERS 25 6000 : 6130: FARMING, FISHING, AND FORESTRY 26 6200 : 6940: CONSTRUCTION TRADES AND EXTRACTION WORKERS 27 7000 : 7620: INSTALLATION, MAINTENANCE, AND REPAIR WORKERS 28 7700 : 7750: PRODUCTION AND OPERATING WORKERS 29 7800 : 7850: FOOD PREPARATION 30 7900 : 8960: SETTER, OPERATORS, AND TENDERS 31 9000 : 9750: TRANSPORTATION AND MATERIAL MOVING WORKERS 32 9800 : 9840: MILITARY SPECIFIC OCCUPATIONS 33 9950 : 9990: ACS SPECIAL CODES
#subset censoring
nlsy97mig<-subset(nlsy97mig, is.na(unem2)==F&is.na(unem3)==F&
is.na(unem4)==F&is.na(age2)==F&
is.na(age3)==F&is.na(age4)==F&unem2!=1)
# Reshape
e.long<-reshape(data.frame(nlsy97mig), idvar = "rid",
varying = list(c("age2", "age3"),
c("age3", "age4")),
v.names = c("age_enter", "age_exit"),
times = 1:2, direction = "long")
e.long<-e.long[order(e.long$rid, e.long$time),]
e.long$unemtran<-NA
e.long$unemtran[e.long$unem2==0&e.long$unem3==1&e.long$time==1]<-1
e.long$unemtran[e.long$unem3==0&e.long$unem4==1&e.long$time==2]<-1
e.long$unemtran[e.long$unem2==0&e.long$unem3==0&e.long$time==1]<-0
e.long$unemtran[e.long$unem3==0&e.long$unem4==0&e.long$time==2]<-0
#Censor failures
failed1<-which(is.na(e.long$unemtran)==T)
e.long<-e.long[-failed1,]
e.long$age1r<-round(e.long$age_enter,0)
e.long$age4r<-round(e.long$age_exit,0)
e.long$time_start<-e.long$time-1
head(e.long, n=10)
## rid wht psu strat rsex reth rrace rbirthyr rregn06 rurbru06 rjob06
## 1.1 1 323197 1 35 2 4 1 1981 1 1 4760
## 1.2 1 323197 1 35 2 4 1 1981 1 1 4760
## 4.1 4 149099 2 35 2 2 5 1981 1 1 5240
## 4.2 4 149099 2 35 2 2 5 1981 1 1 5240
## 5.1 5 245150 1 35 1 2 1 1982 1 1 3740
## 5.2 5 245150 1 35 1 2 1 1982 1 1 3740
## 6.1 6 218371 1 35 2 2 5 1982 1 1 2540
## 6.2 6 218371 1 35 2 2 5 1982 1 1 2540
## 9.1 9 313027 1 35 1 4 1 1982 1 1 3920
## 9.2 9 313027 1 35 1 4 1 1982 1 1 3920
## rcit06 reduc06 rmig07 rmig08 rmig09 ragem06 ragem07 ragem08 ragem09 remp06
## 1.1 2 5 -4 -4 -4 302 313 325 337 200103
## 1.2 2 5 -4 -4 -4 302 313 325 337 200103
## 4.1 1 3 1 -4 1 309 320 336 344 200502
## 4.2 1 3 1 -4 1 309 320 336 344 200502
## 5.1 1 3 -4 -4 0 289 300 312 323 200601
## 5.2 1 3 -4 -4 0 289 300 312 323 200601
## 6.1 1 2 -4 -4 -4 302 310 323 334 200402
## 6.2 1 2 -4 -4 -4 302 310 323 334 200402
## 9.1 1 5 -4 -4 -4 289 301 314 325 5
## 9.2 1 5 -4 -4 -4 289 301 314 325 5
## remp07 remp08 remp09 age1 mig1 mig2 mig3 unem1 unem2 unem3 unem4 male
## 1.1 200702 200702 200702 25.16667 -4 -4 -4 0 0 0 0 0
## 1.2 200702 200702 200702 25.16667 -4 -4 -4 0 0 0 0 0
## 4.1 200502 200502 200802 25.75000 1 -4 1 0 0 0 0 0
## 4.2 200502 200502 200802 25.75000 1 -4 1 0 0 0 0 0
## 5.1 200602 200602 200602 24.08333 -4 -4 0 0 0 0 0 1
## 5.2 200602 200602 200602 24.08333 -4 -4 0 0 0 0 0 1
## 6.1 200402 200402 200402 25.16667 -4 -4 -4 0 0 0 0 0
## 6.2 200402 200402 200402 25.16667 -4 -4 -4 0 0 0 0 0
## 9.1 200601 200601 200601 24.08333 -4 -4 -4 1 0 0 0 1
## 9.2 200601 200601 200601 24.08333 -4 -4 -4 1 0 0 0 1
## nonwhite white black Asian AmerIn other hisp none GED HSdip assoc bach
## 1.1 0 1 0 0 0 0 0 0 1 1 1 1
## 1.2 0 1 0 0 0 0 0 0 1 1 1 1
## 4.1 1 0 0 0 0 1 1 0 1 1 0 0
## 4.2 1 0 0 0 0 1 1 0 1 1 0 0
## 5.1 0 1 0 0 0 0 1 0 1 1 0 0
## 5.2 0 1 0 0 0 0 1 0 1 1 0 0
## 6.1 1 0 0 0 0 1 1 0 1 0 0 0
## 6.2 1 0 0 0 0 1 1 0 1 0 0 0
## 9.1 0 1 0 0 0 0 0 0 1 1 1 1
## 9.2 0 1 0 0 0 0 0 0 1 1 1 1
## mastr Doct college nocollege natbrncit naturcit appnaturcit permrescit
## 1.1 0 0 1 0 0 1 0 0
## 1.2 0 0 1 0 0 1 0 0
## 4.1 0 0 0 1 1 0 0 0
## 4.2 0 0 0 1 1 0 0 0
## 5.1 0 0 0 1 1 0 0 0
## 5.2 0 0 0 1 1 0 0 0
## 6.1 0 0 0 1 1 0 0 0
## 6.2 0 0 0 1 1 0 0 0
## 9.1 0 0 1 0 1 0 0 0
## 9.2 0 0 1 0 1 0 0 0
## apprescit othrcit noncit immigrant nonwhtcol rjob1 rjob2 rjob3 rjob4 rjob5
## 1.1 0 0 0 1 0 0 0 0 0 0
## 1.2 0 0 0 1 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0 0 0 0 0
## 4.2 0 0 0 0 0 0 0 0 0 0
## 5.1 0 0 0 0 0 0 0 0 0 0
## 5.2 0 0 0 0 0 0 0 0 0 0
## 6.1 0 0 0 0 0 0 0 0 0 0
## 6.2 0 0 0 0 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0 0 0 0 0
## 9.2 0 0 0 0 0 0 0 0 0 0
## rjob6 rjob7 rjob8 rjob9 rjob10 rjob11 rjob12 rjob13 rjob14 rjob15 rjob16
## 1.1 0 0 0 0 0 0 0 0 0 0 0
## 1.2 0 0 0 0 0 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0 0 0 0 0 0
## 4.2 0 0 0 0 0 0 0 0 0 0 0
## 5.1 0 0 0 0 0 0 0 0 0 0 0
## 5.2 0 0 0 0 0 0 0 0 0 0 0
## 6.1 0 0 0 0 0 0 1 0 0 0 0
## 6.2 0 0 0 0 0 0 1 0 0 0 0
## 9.1 0 0 0 0 0 0 0 0 0 0 0
## 9.2 0 0 0 0 0 0 0 0 0 0 0
## rjob17 rjob18 rjob19 rjob20 rjob21 rjob22 rjob23 rjob24 rjob25 rjob26
## 1.1 0 0 0 0 0 0 1 0 0 0
## 1.2 0 0 0 0 0 0 1 0 0 0
## 4.1 0 0 0 0 0 0 0 1 0 0
## 4.2 0 0 0 0 0 0 0 1 0 0
## 5.1 1 0 0 0 0 0 0 0 0 0
## 5.2 1 0 0 0 0 0 0 0 0 0
## 6.1 0 0 0 0 0 0 0 0 0 0
## 6.2 0 0 0 0 0 0 0 0 0 0
## 9.1 1 0 0 0 0 0 0 0 0 0
## 9.2 1 0 0 0 0 0 0 0 0 0
## rjob27 rjob28 rjob29 rjob30 rjob31 rjob32 rjob33 northeast northcent south
## 1.1 0 0 0 0 0 0 0 1 0 0
## 1.2 0 0 0 0 0 0 0 1 0 0
## 4.1 0 0 0 0 0 0 0 1 0 0
## 4.2 0 0 0 0 0 0 0 1 0 0
## 5.1 0 0 0 0 0 0 0 1 0 0
## 5.2 0 0 0 0 0 0 0 1 0 0
## 6.1 0 0 0 0 0 0 0 1 0 0
## 6.2 0 0 0 0 0 0 0 1 0 0
## 9.1 0 0 0 0 0 0 0 1 0 0
## 9.2 0 0 0 0 0 0 0 1 0 0
## west urban rural time age_enter age_exit unemtran age1r age4r time_start
## 1.1 0 1 0 1 26.08333 27.08333 0 26 27 0
## 1.2 0 1 0 2 27.08333 28.08333 0 27 28 1
## 4.1 0 1 0 1 26.66667 28.00000 0 27 28 0
## 4.2 0 1 0 2 28.00000 28.66667 0 28 29 1
## 5.1 0 1 0 1 25.00000 26.00000 0 25 26 0
## 5.2 0 1 0 2 26.00000 26.91667 0 26 27 1
## 6.1 0 1 0 1 25.83333 26.91667 0 26 27 0
## 6.2 0 1 0 2 26.91667 27.83333 0 27 28 1
## 9.1 0 1 0 1 25.08333 26.16667 0 25 26 0
## 9.2 0 1 0 2 26.16667 27.08333 0 26 27 1
e.long%>%
group_by(time)%>%
summarise(prop_event= mean(unemtran, na.rm=T))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## time prop_event
## <int> <dbl>
## 1 1 0.115
## 2 2 0.117
e.long%>%
filter(complete.cases(time,white,hisp, Asian, AmerIn,other, nonwhite, nocollege,rjob30,rural,black, northeast, northcent, south, west))%>%
group_by(time,white,hisp, Asian, AmerIn,other, nonwhite, nocollege,rjob30,rural,black, northeast, northcent, south, west)%>%
summarise(prop_event= mean(unemtran, na.rm=T))
## `summarise()` regrouping output by 'time', 'white', 'hisp', 'Asian', 'AmerIn', 'other', 'nonwhite', 'nocollege', 'rjob30', 'rural', 'black', 'northeast', 'northcent', 'south' (override with `.groups` argument)
## # A tibble: 280 x 16
## # Groups: time, white, hisp, Asian, AmerIn, other, nonwhite, nocollege,
## # rjob30, rural, black, northeast, northcent, south [280]
## time white hisp Asian AmerIn other nonwhite nocollege rjob30 rural black
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 0 0 0 0 1 0 0 0 1
## 2 1 0 0 0 0 0 1 0 0 0 1
## 3 1 0 0 0 0 0 1 0 0 0 1
## 4 1 0 0 0 0 0 1 0 0 0 1
## 5 1 0 0 0 0 0 1 0 0 1 1
## 6 1 0 0 0 0 0 1 0 0 1 1
## 7 1 0 0 0 0 0 1 0 1 0 1
## 8 1 0 0 0 0 0 1 0 1 0 1
## 9 1 0 0 0 0 0 1 1 0 0 1
## 10 1 0 0 0 0 0 1 1 0 0 1
## # ... with 270 more rows, and 5 more variables: northeast <dbl>,
## # northcent <dbl>, south <dbl>, west <dbl>, prop_event <dbl>
options("survey.adjust.domain.lonely"=T)
options("survey.lonely.psu"='adjust')
options(survey.lonely.psu = "adjust")
e.long<-e.long%>%
filter(complete.cases(psu,time,white,hisp, Asian, AmerIn,other, nonwhite, nocollege,rjob30,rural,black, northeast, northcent, south, west))
des<-svydesign(nest = TRUE,ids =~psu,
strata =~strat, weights =~wht,
data = e.long)
svymean(~strat, design=des)
## mean SE
## strat 39.413 0.6537
# Intercept
fit1<-svyglm(unemtran~as.factor(time_start)-1,
design=des, family=binomial(link="logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
# Black race
fit2<-svyglm(unemtran~as.factor(time_start)+black-1,
design=des, family=binomial(link="logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
# Other races
fit3<-svyglm(unemtran~as.factor(time_start)+black+Asian+white+hisp+male-1,
design=des, family=binomial(link="logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
# highest level of education
fit4<-svyglm(unemtran~as.factor(time_start)+black+Asian+white+hisp+male+college-1,
design=des, family=binomial(link="logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
# High/low risk occupations
fit5<-svyglm(unemtran~as.factor(time_start)+black+Asian+white+hisp+male+college+rjob30-1,
design=des, family=binomial(link="logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
# Race education interaction
fit6<-svyglm(unemtran~as.factor(time_start)+black+Asian+white+hisp+male+college+rjob30+west+northeast+south-1,
design=des, family=binomial(link="logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
fit7<-svyglm(unemtran~as.factor(time_start)+black*college+Asian+white+hisp+male+rjob30+west+ northeast+south-1,
design=des, family=binomial(link="logit"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit1)
##
## Call:
## svyglm(formula = unemtran ~ as.factor(time_start) - 1, design = des,
## family = binomial(link = "logit"))
##
## Survey design:
## svydesign(nest = TRUE, ids = ~psu, strata = ~strat, weights = ~wht,
## data = e.long)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## as.factor(time_start)0 -2.09427 0.05391 -38.85 <2e-16 ***
## as.factor(time_start)1 -2.07461 0.04515 -45.95 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.00011)
##
## Number of Fisher Scoring iterations: 4
summary(fit2)
##
## Call:
## svyglm(formula = unemtran ~ as.factor(time_start) + black - 1,
## design = des, family = binomial(link = "logit"))
##
## Survey design:
## svydesign(nest = TRUE, ids = ~psu, strata = ~strat, weights = ~wht,
## data = e.long)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## as.factor(time_start)0 -2.15563 0.05592 -38.550 < 2e-16 ***
## as.factor(time_start)1 -2.13427 0.04828 -44.203 < 2e-16 ***
## black 0.35393 0.07923 4.467 1.88e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1.000142)
##
## Number of Fisher Scoring iterations: 4
summary(fit3)
##
## Call:
## svyglm(formula = unemtran ~ as.factor(time_start) + black + Asian +
## white + hisp + male - 1, design = des, family = binomial(link = "logit"))
##
## Survey design:
## svydesign(nest = TRUE, ids = ~psu, strata = ~strat, weights = ~wht,
## data = e.long)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## as.factor(time_start)0 -2.19361 0.13513 -16.233 < 2e-16 ***
## as.factor(time_start)1 -2.16830 0.13548 -16.004 < 2e-16 ***
## black 0.52054 0.14716 3.537 0.000593 ***
## Asian 0.07289 0.33417 0.218 0.827733
## white 0.17913 0.12717 1.409 0.161779
## hisp 0.17385 0.11299 1.539 0.126752
## male -0.30020 0.07492 -4.007 0.000112 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9994058)
##
## Number of Fisher Scoring iterations: 4
summary(fit4)
##
## Call:
## svyglm(formula = unemtran ~ as.factor(time_start) + black + Asian +
## white + hisp + male + college - 1, design = des, family = binomial(link = "logit"))
##
## Survey design:
## svydesign(nest = TRUE, ids = ~psu, strata = ~strat, weights = ~wht,
## data = e.long)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## as.factor(time_start)0 -1.95192 0.13549 -14.407 < 2e-16 ***
## as.factor(time_start)1 -1.91834 0.13843 -13.858 < 2e-16 ***
## black 0.42905 0.14272 3.006 0.00328 **
## Asian 0.18541 0.34936 0.531 0.59669
## white 0.18035 0.12303 1.466 0.14555
## hisp 0.06944 0.11505 0.604 0.54736
## male -0.36975 0.07805 -4.737 6.57e-06 ***
## college -0.64950 0.09880 -6.574 1.75e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9993826)
##
## Number of Fisher Scoring iterations: 5
summary(fit5)
##
## Call:
## svyglm(formula = unemtran ~ as.factor(time_start) + black + Asian +
## white + hisp + male + college + rjob30 - 1, design = des,
## family = binomial(link = "logit"))
##
## Survey design:
## svydesign(nest = TRUE, ids = ~psu, strata = ~strat, weights = ~wht,
## data = e.long)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## as.factor(time_start)0 -1.97966 0.13689 -14.462 < 2e-16 ***
## as.factor(time_start)1 -1.94434 0.13916 -13.972 < 2e-16 ***
## black 0.43422 0.14320 3.032 0.00304 **
## Asian 0.19404 0.34794 0.558 0.57822
## white 0.18184 0.12305 1.478 0.14237
## hisp 0.07889 0.11579 0.681 0.49712
## male -0.38351 0.07836 -4.894 3.47e-06 ***
## college -0.62388 0.09810 -6.360 4.98e-09 ***
## rjob30 0.48497 0.16099 3.012 0.00323 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9998205)
##
## Number of Fisher Scoring iterations: 5
summary(fit6)
##
## Call:
## svyglm(formula = unemtran ~ as.factor(time_start) + black + Asian +
## white + hisp + male + college + rjob30 + west + northeast +
## south - 1, design = des, family = binomial(link = "logit"))
##
## Survey design:
## svydesign(nest = TRUE, ids = ~psu, strata = ~strat, weights = ~wht,
## data = e.long)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## as.factor(time_start)0 -2.00884 0.15353 -13.085 < 2e-16 ***
## as.factor(time_start)1 -1.97340 0.15745 -12.534 < 2e-16 ***
## black 0.45163 0.14833 3.045 0.00294 **
## Asian 0.17518 0.34688 0.505 0.61461
## white 0.19387 0.12620 1.536 0.12749
## hisp 0.05849 0.11749 0.498 0.61965
## male -0.38277 0.07827 -4.890 3.64e-06 ***
## college -0.62288 0.09885 -6.301 7.05e-09 ***
## rjob30 0.49058 0.16131 3.041 0.00298 **
## west 0.10851 0.13043 0.832 0.40734
## northeast -0.07642 0.10817 -0.706 0.48145
## south 0.02173 0.09057 0.240 0.81084
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9990917)
##
## Number of Fisher Scoring iterations: 5
summary(fit7)
##
## Call:
## svyglm(formula = unemtran ~ as.factor(time_start) + black * college +
## Asian + white + hisp + male + rjob30 + west + northeast +
## south - 1, design = des, family = binomial(link = "logit"))
##
## Survey design:
## svydesign(nest = TRUE, ids = ~psu, strata = ~strat, weights = ~wht,
## data = e.long)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## as.factor(time_start)0 -2.03357 0.15330 -13.265 < 2e-16 ***
## as.factor(time_start)1 -1.99710 0.15790 -12.648 < 2e-16 ***
## black 0.54921 0.14794 3.712 0.000332 ***
## college -0.54111 0.10999 -4.919 3.26e-06 ***
## Asian 0.16216 0.34503 0.470 0.639346
## white 0.19371 0.12657 1.530 0.128937
## hisp 0.06940 0.11686 0.594 0.553896
## male -0.38496 0.07788 -4.943 2.96e-06 ***
## rjob30 0.49703 0.16085 3.090 0.002567 **
## west 0.11036 0.13016 0.848 0.398469
## northeast -0.07584 0.10806 -0.702 0.484368
## south 0.02680 0.09005 0.298 0.766589
## black:college -0.68555 0.22653 -3.026 0.003120 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9973726)
##
## Number of Fisher Scoring iterations: 5
AIC1<-AIC(fit1)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC2<-AIC(fit2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC3<-AIC(fit3)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC4<-AIC(fit4)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC5<-AIC(fit5)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC6<-AIC(fit6)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC7<-AIC(fit7)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
print(AIC1)
## eff.p AIC deltabar
## 2.22834 6322.01749 1.11417
print(AIC2)
## eff.p AIC deltabar
## 3.105504 6307.465890 1.035168
print(AIC3)
## eff.p AIC deltabar
## 8.112278 6295.210395 1.158897
print(AIC4)
## eff.p AIC deltabar
## 9.812926 6228.181162 1.226616
print(AIC5)
## eff.p AIC deltabar
## 11.025625 6220.680351 1.225069
print(AIC6)
## eff.p AIC deltabar
## 14.74524 6225.34180 1.22877
print(AIC7)
## eff.p AIC deltabar
## 15.381496 6219.422384 1.183192
sums<-data.frame(round(summary(fit7)$coef, 3))
sums$HR<-round(exp(sums[,1]), 3)
sums
## Estimate Std..Error t.value Pr...t.. HR
## as.factor(time_start)0 -2.034 0.153 -13.265 0.000 0.131
## as.factor(time_start)1 -1.997 0.158 -12.648 0.000 0.136
## black 0.549 0.148 3.712 0.000 1.732
## college -0.541 0.110 -4.919 0.000 0.582
## Asian 0.162 0.345 0.470 0.639 1.176
## white 0.194 0.127 1.530 0.129 1.214
## hisp 0.069 0.117 0.594 0.554 1.071
## male -0.385 0.078 -4.943 0.000 0.680
## rjob30 0.497 0.161 3.090 0.003 1.644
## west 0.110 0.130 0.848 0.398 1.116
## northeast -0.076 0.108 -0.702 0.484 0.927
## south 0.027 0.090 0.298 0.767 1.027
## black:college -0.686 0.227 -3.026 0.003 0.504
desstat<-data.frame(nlsy97mig$black, nlsy97mig$college, nlsy97mig$Asian, nlsy97mig$white, nlsy97mig$hisp, nlsy97mig$male, nlsy97mig$rjob30, nlsy97mig$west, nlsy97mig$northeast, nlsy97mig$south)
options(scipen = 100)
options(digits = 2)
stat.desc(desstat, basic = F)
## nlsy97mig.black nlsy97mig.college nlsy97mig.Asian nlsy97mig.white
## median 0.000 0.0000 0.0000 1.0000
## mean 0.259 0.2913 0.0174 0.5986
## SE.mean 0.006 0.0063 0.0018 0.0067
## CI.mean.0.95 0.012 0.0124 0.0035 0.0132
## var 0.192 0.2065 0.0171 0.2403
## std.dev 0.438 0.4544 0.1309 0.4902
## coef.var 1.690 1.5601 7.5077 0.8189
## nlsy97mig.hisp nlsy97mig.male nlsy97mig.rjob30 nlsy97mig.west
## median 0.0000 1.0000 0.0000 0.0000
## mean 0.2185 0.5133 0.0399 0.2311
## SE.mean 0.0056 0.0068 0.0028 0.0059
## CI.mean.0.95 0.0110 0.0134 0.0054 0.0115
## var 0.1708 0.2499 0.0383 0.1777
## std.dev 0.4133 0.4999 0.1957 0.4216
## coef.var 1.8911 0.9739 4.9081 1.8242
## nlsy97mig.northeast nlsy97mig.south
## median 0.0000 0.0000
## mean 0.1556 0.3938
## SE.mean 0.0050 0.0068
## CI.mean.0.95 0.0099 0.0133
## var 0.1314 0.2388
## std.dev 0.3625 0.4886
## coef.var 2.3302 1.2408
stat.desc(nlsy97mig$unem1, basic = F)
## median mean SE.mean CI.mean.0.95 var std.dev
## 0.0000 0.1505 0.0049 0.0096 0.1279 0.3576
## coef.var
## 2.3758
stat.desc(nlsy97mig$unem3, basic = F)
## median mean SE.mean CI.mean.0.95 var std.dev
## 0.0000 0.1152 0.0044 0.0085 0.1020 0.3193
## coef.var
## 2.7714
stat.desc(nlsy97mig$unem4, basic = F)
## median mean SE.mean CI.mean.0.95 var std.dev
## 0.0000 0.1583 0.0050 0.0098 0.1333 0.3651
## coef.var
## 2.3058