Requirements
library(lattice)
library(ggplot2)
library(car)
library(stargazer, quietly = T)
library(plm)
library(sampleSelection)
library(sandwich)
library(lmtest)
library(readr)
Read in raw data
deg.creds_dat <- read_csv("https://raw.githubusercontent.com/cjmarraro/wageGapData/master/data/csv_dir/tagset_edu.csv")
health_dat <- read_csv("https://raw.githubusercontent.com/cjmarraro/wageGapData/master/data/csv_dir/tagset_health.csv")
spouse_dat <- read_csv("https://raw.githubusercontent.com/cjmarraro/wageGapData/master/data/csv_dir/tagset_spouse.csv")
marital_dat <- read_csv("https://raw.githubusercontent.com/cjmarraro/wageGapData/master/data/csv_dir/tagset_marital.csv")
indust_dat <- read_csv("https://raw.githubusercontent.com/cjmarraro/wageGapData/master/data/csv_dir/tagset_indust.csv")
tenure_dat <- read_csv("https://raw.githubusercontent.com/cjmarraro/wageGapData/master/data/csv_dir/tagset_tenure.csv")
occu_dat <- read_csv("https://raw.githubusercontent.com/cjmarraro/wageGapData/master/data/csv_dir/tagset_occu.csv")
ex_occ_dat <- read_csv("https://raw.githubusercontent.com/cjmarraro/wageGapData/master/data/csv_dir/tagset_ex_occu.csv")
occudesire_dat <- read_csv("https://raw.githubusercontent.com/cjmarraro/wageGapData/master/data/csv_dir/tagset_occdesire.csv")
hrs_dat <- read_csv("https://raw.githubusercontent.com/cjmarraro/wageGapData/master/data/csv_dir/tageset_hrs.csv")
wage_dat <- read_csv("https://raw.githubusercontent.com/cjmarraro/wageGapData/master/data/csv_dir/tageset_hrwage.csv")
income_dat <- read_csv("https://raw.githubusercontent.com/cjmarraro/wageGapData/master/data/csv_dir/tagset_income.csv")
age_dat <- read_csv("https://raw.githubusercontent.com/cjmarraro/wageGapData/master/data/csv_dir/tagset_age.csv")
preprocess data
age_dat <- age_dat[,c("R0000100", "T3966200")]
income_dat = income_dat[,c("R0214700", "R0214800", "R0328000",
"R0482600", "R0782101","R1024001",
"R1410701", "R1778501", "R2141601",
"R2350301","R2722501", "R2971401",
"R3279401","R3559001","R3897101",
"R4295101", "R5626201", "R6364601",
"R6909701" ,"R7607800", "R8316300",
"T0912400", "T3045300" ,"T3977400")]
wage_dat = wage_dat[,c("R0047010", "R0263710", "R0446810",
"R0702510" ,"R0945610", "R1256010",
"R1650810", "R1923410","R2318210", "R2526010",
"R2925010" ,"R3127800" ,"R3523500", "R3728500",
"R4416800", "R5079800" ,"R5165200" ,"R6478000",
"R7005700" ,"R7702900","R8495200" ,"T0986800")]
hrs_dat = hrs_dat[,c("R0070500", "R0337800", "R0545600", "R0839900",
"R1087300","R1463000" ,"R1809800" ,"R2171500",
"R2376300", "R2770900", "R3012700", "R3340100",
"R3604400", "R3954600", "R4582900", "R5267100",
"R5912600", "R6578200", "R7192900" ,"R7879600",
"T0121900" )]
hrs_dat$HRS.NA <- NA
occu_dat = occu_dat[,c("R0263400", "R0828000", "R0945300", "R1255700",
"R1650500","R1923100", "R2317900", "R2525700",
"R2924700", "R3127400", "R3523100" ,"R3728100",
"R4193700", "R4587901", "R5270900","R6473700",
"R6592900", "R7209600", "R7898000", "T0138400")]
occu_dat$OCC.NA <- NA
occu_dat$OCC.NA1 <- NA
exp_occu_dat = ex_occ_dat[,-1]
occ_desr = occudesire_dat[,-1]
indust_dat = indust_dat[,c("R0046300", "R0263300", "R0446300",
"R0702000", "R0944900","R1255300",
"R1650100", "R1922700","R2317500" ,
"R2525300", "R2924300", "R3127000",
"R3522700", "R3727700", "R4182000",
"R4587901", "R5270500", "R6472100",
"R6591300" ,"R7209100", "R7897500",
"T0137900")]
tenure_dat = tenure_dat[,c("R0068710", "R0333221", "R0539410",
"R0833810", "R1081010","R1456710" ,
"R1803510", "R2165110", "R2372510",
"R2763410", "R3005210", "R3332610",
"R3597610", "R3947800", "R4416300",
"R5079300", "R5164600" ,"R6477500",
"R7005200", "R7702400","R8494700",
"T0986300")]
marital_dat = marital_dat[,c("R0217501", "R0405601", "R0618601",
"R0898401","R1144901", "R1520101",
"R1890801", "R2257901", "R2445301",
"R2871000","R3074700", "R3401400",
"R3656800", "R4007300", "R4418400",
"R5081400", "R5166700", "R6479300",
"R7007000", "R7704300","R8496700",
"T0988500")]
spouse_dat = spouse_dat[,c("R0155500", "R0312710", "R0482910",
"R0784300", "R1024001","R1410701",
"R1780701", "R2143801", "R2352501",
"R2724701", "R2973600", "R3281600",
"R3571801", "R3899300", "R4314401",
"R4996001", "R5650801", "R6374901",
"R6917801", "R8325800", "T0920800" )]
spouse_dat$SPO.NA <-NA
health_dat = health_dat[,-1]
deg.creds_dat = deg.creds_dat[,-1]
generate column names for time-series
mylist = list()
iterColNames <- function(mylist){
appendYear = c(as.character(79:94),'96','98','00',
'02','04','06')
myColNames = list()
for(i in mylist){
myColNames[[i]] = paste(i, appendYear, sep = ".")
myColNames = as.list(myColNames)
myColNames = myColNames[myColNames= as.character(mylist)]
}
return(myColNames)
}
Calculate the variables tenure and “total experience” in 2006
calc_experience <- (rowSums(tenure_dat))
tot_exp06 <-as.data.frame(calc_experience)
tenure <- as.data.frame(rowMeans(tenure_dat))
dat_all <- data.frame(age_dat, deg.creds_dat, exp_occu_dat,
tot_exp06, occ_desr, income_dat, occu_dat,
indust_dat, tenure_dat, marital_dat, spouse_dat,
wage_dat, hrs_dat, health_dat, tenure)
dat_all[dat_all == -1] = NA # Refused
dat_all[dat_all == -2] = NA # Dont know
dat_all[dat_all == -3] = NA # Invalid missing
dat_all[dat_all == -4] = NA # Valid missing
dat_all[dat_all == -5] = NA # Non-interview
myColNames <-iterColNames(list("INC", "OCC", "IND",
"TEN", "MAR", "SPO",
"WAG", "HRS"))
colnames(dat_all) <- c("CaseID","Age", "CREDITS", "EXPT.OCC",
"TOT.EXP06", "DES.OCC", "Race", "Sex",
as.character(as.list(unlist(myColNames))),
"PHYS", "WGHT", "tenure")
Subset individuals with bachelors degree or higher
dat_ed = (dat_all$CREDITS > 100)
dat_use <-subset(dat_all, dat_ed)
compare percentage change of income in 2006 from previous years income
years_stagger <- dat_use[,c(9,12,15,20)]
year06 <- dat_use[,30]
wage <- as.matrix((log(year06/years_stagger))/15 )
wage[is.infinite(as.numeric(wage))] <- NA
summary(wage)
## INC.79 INC.82 INC.85 INC.90
## Min. :0.1030 Min. :-0.1023 Min. :-0.1284 Min. :-0.24795
## 1st Qu.:0.2679 1st Qu.: 0.1821 1st Qu.: 0.1315 1st Qu.: 0.04621
## Median :0.3447 Median : 0.2396 Median : 0.1832 Median : 0.07079
## Mean :0.3427 Mean : 0.2361 Mean : 0.1827 Mean : 0.07182
## 3rd Qu.:0.4027 3rd Qu.: 0.2907 3rd Qu.: 0.2364 3rd Qu.: 0.09714
## Max. :0.6454 Max. : 0.5593 Max. : 0.4131 Max. : 0.39387
## NA's :367 NA's :242 NA's :223 NA's :221
transform to panel data
years <- c(1979:1994,1996,1998,2000,2002,2004,2006)
dat_reshape <-reshape(dat_use, direction = "long",
varying = list(myColNames$INC, myColNames$OCC,
myColNames$IND,myColNames$TEN, myColNames$MAR,
myColNames$SPO, myColNames$WAG, myColNames$HRS),
v.names=c("INC", "OCC", "IND", "TEN", "MAR",
"SPO", "WAG", "HRS"), idvar="CaseID",
times=rep(years))
Additional features
dat_panel = dat_reshape
dat_hrs = (dat_panel[, "HRS"] >= 35) # full-time workers only
data_NLSY <- subset(dat_panel, dat_hrs)
attach(data_NLSY)
# occupation dummies
mngmt = as.numeric((OCC >= 1) & (OCC <= 43))
professional = as.numeric((OCC >= 50) & (OCC <= 95))
hard_science = as.numeric((OCC >= 100) & (OCC <= 156))
technical <- as.numeric((OCC >= 203) & (OCC <= 235))
sales <- as.numeric((OCC >= 243) & (OCC <= 285))
clerical <- as.numeric((OCC >= 303) & (OCC <= 395))
service <- as.numeric((OCC >= 403) & (OCC <= 469))
operatives <- as.numeric((OCC >= 473) & (OCC <= 889))
occ_indx <- as.factor(0 * mngmt + 1 * professional + 2 * hard_science + 3 *
technical + 4 * sales + 5 * clerical + 6 * service + 7 * operatives)
levels(occ_indx)[1] <- "mngmt"
levels(occ_indx)[2] <- "prof"
levels(occ_indx)[3] <- "hard science"
levels(occ_indx)[4] <- "technical"
levels(occ_indx)[5] <- "sales"
levels(occ_indx)[6] <- "clerical"
levels(occ_indx)[7] <- "services"
levels(occ_indx)[8] <- "operatives"
occ_indx = relevel(occ_indx, ref = "mngmt")
levels(occ_indx)
## [1] "mngmt" "prof" "hard science" "technical"
## [5] "sales" "clerical" "services" "operatives"
female <- as.numeric(Sex == 2)
hispanic <- as.numeric(Race == 1)
AfAm <- as.numeric(Race == 2)
race_other <- as.numeric(Race == 3)
# Physical abilities proxy
phys <- as.factor(PHYS)
levels(phys) <- c("Excellent", "Very good", "Good", "Fair", "Poor")
phys = relevel(phys, ref = "Good")
# Physical health proxy
hlth <- as.factor(WGHT)
levels(hlth) <- c("overweight", "underweight", "normal", "notascertained")
hlth = relevel(hlth, ref = "normal")
marital <- as.factor(MAR)
levels(marital) <- c("never married", "married", "separated", "divorced", "remarried",
"widowed")
marital = relevel(marital, ref = "married")
# 5 yr career expectation parameter <-- career expectations in 1979, those
# who expected to be married (i.e. didn't plan on having a job) := 0,
# otherwise := 1
exp_career <- as.numeric(EXPT.OCC == c(1, 2, 4))
tenure = round(data_NLSY$tenure)
logHours <- log(HRS)
logHours[is.infinite(logHours)] <- NA
logWage <- log(WAG)
logWage[is.infinite((logWage))] <- NA
# occupational expectations to be compared with actual occupation
desr_mngmt <- as.numeric((DES.OCC >= 1) & (DES.OCC <= 43))
desr_professional <- as.numeric((DES.OCC >= 50) & (DES.OCC <= 95))
desr_hard_science <- as.numeric((DES.OCC >= 100) & (DES.OCC <= 156))
desr_technical <- as.numeric((DES.OCC >= 203) & (DES.OCC <= 235))
desr_sales <- as.numeric((DES.OCC >= 243) & (DES.OCC <= 285))
desr_clerical <- as.numeric((DES.OCC >= 303) & (DES.OCC <= 395))
desr_service <- as.numeric((DES.OCC >= 403) & (DES.OCC <= 469))
desr_operatives <- as.numeric((DES.OCC >= 473) & (DES.OCC <= 889))
desr.occ_indx <- as.factor(0 * desr_mngmt + 1 * desr_professional + 2 * desr_hard_science +
3 * desr_technical + 4 * desr_sales + 5 * desr_clerical + 6 * desr_service +
7 * desr_operatives)
levels(desr.occ_indx)[1] <- "desr.mngmt"
levels(desr.occ_indx)[2] <- "desr.prof"
levels(desr.occ_indx)[3] <- "desr.hard science"
levels(desr.occ_indx)[4] <- "desr.technical"
levels(desr.occ_indx)[5] <- "desr.sales"
levels(desr.occ_indx)[6] <- "desr.clerical"
levels(desr.occ_indx)[7] <- "desr.services"
levels(desr.occ_indx)[8] <- "desr.operatives"
print(by(data_NLSY$INC, occ_indx, summary)) # income, occupation comparison
## occ_indx: mngmt
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 35000 50000 64161 75000 279816 43
## --------------------------------------------------------
## occ_indx: prof
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 42000 56000 67061 75000 236000 10
## --------------------------------------------------------
## occ_indx: hard science
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 32000 47000 61558 78000 279816 3
## --------------------------------------------------------
## occ_indx: technical
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1200 27000 42000 46225 62300 236000 6
## --------------------------------------------------------
## occ_indx: sales
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 130 21000 34575 42943 60000 216200 13
## --------------------------------------------------------
## occ_indx: clerical
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 10300 28000 47512 58000 236000 25
## --------------------------------------------------------
## occ_indx: services
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 2500 18000 40679 55000 279816 15
## --------------------------------------------------------
## occ_indx: operatives
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0 15000 31000 37176 48000 236000 21
careers <- as.data.frame(cbind(as.character(occ_indx), as.character(desr.occ_indx)))
colnames(careers) <- c("Actual", "Desired")
careers_tab <- xtabs(~Actual + Desired, careers, addNA = T)
dActual <- rowSums(careers_tab)
dDesired <- colSums(careers_tab)
career_result <- rbind(dActual, dDesired)
barplot(career_result[, -9], col = c("black", "grey"), beside = T, main = "Actual vs. Desired Occupation",
legend.text = c("Actual", "Desired"), ylim = c(0, 960))
data_NLSY = cbind.data.frame(data_NLSY[, c("CaseID", "time", "Age", "Race",
"Sex", "PHYS", "WGHT", "CREDITS", "EXPT.OCC", "TOT.EXP06", "DES.OCC", "tenure",
"INC", "OCC", "IND", "MAR", "SPO", "WAG", "HRS")], occ_indx, desr.occ_indx,
marital, female, hispanic, AfAm, race_other, logHours, logWage, hlth, exp_career)
detach(data_NLSY)
# Tests
coplot(logWage ~ time | CaseID * female, type = "l", col = c("blue", " pink"),
data = data_NLSY, panel = function(x, y, ...) panel.smooth(x, y, span = 0.6,
...))
##
## Missing rows: 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 154, 180, 195, 207, 229, 287, 288, 290, 292, 293, 294, 295, 296, 297, 298, 300, 301, 302, 304, 306, 329, 337, 367, 435, 483, 516, 518, 520, 523, 543, 549, 556, 568, 599, 614, 628, 637, 678, 765, 770, 812, 853, 885, 909, 910, 913, 932, 941, 953, 955, 958, 978, 990, 991, 997, 1001, 1020, 1031, 1103, 1104, 1127, 1128, 1130, 1144, 1160, 1195, 1211, 1230, 1251, 1267, 1276, 1299, 1322, 1330, 1342, 1344, 1349, 1393, 1469, 1471, 1474, 1488, 1493, 1511, 1523, 1531, 1535, 1564, 1603, 1648, 1660, 1664, 1679, 1688, 1715, 1765, 1809, 1811, 1813, 1832, 1870, 1880, 1887, 1892, 1930, 1945, 1964, 1965, 1974, 1978, 1999, 2068, 2076, 2100, 2115, 2126, 2158, 2181, 2185, 2191, 2200, 2218, 2248, 2261, 2267, 2270, 2295, 2297, 2299, 2313, 2334, 2337, 2339, 2351, 2356, 2373, 2390, 2437, 2439, 2441, 2445, 2505, 2531, 2532, 2534, 2544, 2547, 2555, 2558, 2579, 2586
xyplot(logWage ~ time, data = data_NLSY, groups = factor(female, labels = c("M",
"F")), pch = 20, auto.key = T, type = c("p", "g"))
Tests
m.ols_null <- lm(logWage~occ_indx+female)
m.ols_full <- lm(logWage~female+AfAm+hispanic+
phys+hlth + marital+occ_indx+
tenure+I(tenure^2)+data_NLSY$TOT.EXP06+
I(data_NLSY$TOT.EXP06^2)+exp_career+IND+
SPO, data=data_NLSY[,c(1:11,13:19)])
mfixed <- plm(logWage~occ_indx+female, data=data_NLSY,
index=c("CaseID", "time"),
model="within") #F stat < .05, use log wage only
pFtest(mfixed, m.ols_null) # F test p-val < .05; use fixed
##
## F test for individual effects
##
## data: logWage ~ occ_indx + female
## F = 3.179, df1 = 418, df2 = 1515, p-value < 2.2e-16
## alternative hypothesis: significant effects
mrandom <- plm(logWage~occ_indx+female,
data=data_NLSY, index=c("CaseID", "time"),
model="random")
phtest(mfixed, mrandom) # < .05, use fixed
##
## Hausman Test
##
## data: logWage ~ occ_indx + female
## chisq = 94.571, df = 7, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
mfixed.2 <-plm(logWage~female+AfAm+hispanic+phys+
hlth+marital+occ_indx+tenure+I(tenure^2)+
TOT.EXP06+I(TOT.EXP06^2)+
exp_career + IND+SPO,
index=c("CaseID", "time"),
model="within", data=data_NLSY)
coeftest(mfixed.2)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## physExcellent -3.5780e-02 8.0394e-02 -0.4451 0.6564429
## physVery good 1.4246e-02 4.5706e-02 0.3117 0.7553872
## physFair -2.2353e-02 4.7956e-02 -0.4661 0.6413073
## physPoor 3.7020e-02 5.6022e-02 0.6608 0.5089956
## maritalnever married 1.1848e-01 1.5945e-01 0.7431 0.4577542
## maritalseparated 4.4580e-02 5.2263e-01 0.0853 0.9320522
## maritaldivorced 2.0855e-01 2.1427e-01 0.9733 0.3308023
## occ_indxprof -3.9529e-01 6.1773e-02 -6.3990 3.253e-10 ***
## occ_indxhard science -1.2213e-01 7.1596e-02 -1.7058 0.0885878 .
## occ_indxtechnical -7.2264e-02 7.3738e-02 -0.9800 0.3274958
## occ_indxsales -2.7211e-01 1.0646e-01 -2.5561 0.0108427 *
## occ_indxclerical -1.0887e-01 8.4019e-02 -1.2958 0.1955605
## occ_indxservices 2.3526e-02 9.2537e-02 0.2542 0.7994097
## occ_indxoperatives -1.4798e-01 8.3864e-02 -1.7646 0.0781702 .
## exp_career 2.7865e-03 3.8991e-02 0.0715 0.9430524
## IND 2.8894e-04 6.8037e-05 4.2469 2.529e-05 ***
## SPO 3.7542e-06 1.0297e-06 3.6459 0.0002907 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(mfixed.2, vcovHC(mfixed.2, method = "arellano"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## physExcellent -3.5780e-02 6.7786e-02 -0.5278 0.5978114
## physVery good 1.4246e-02 5.0757e-02 0.2807 0.7790635
## physFair -2.2353e-02 5.4844e-02 -0.4076 0.6837363
## physPoor 3.7020e-02 5.1190e-02 0.7232 0.4698574
## maritalnever married 1.1848e-01 2.0705e-01 0.5722 0.5674023
## maritalseparated 4.4580e-02 9.1088e-02 0.4894 0.6247293
## maritaldivorced 2.0855e-01 2.9624e-01 0.7040 0.4817145
## occ_indxprof -3.9529e-01 7.6712e-02 -5.1529 3.535e-07 ***
## occ_indxhard science -1.2213e-01 7.7791e-02 -1.5700 0.1169785
## occ_indxtechnical -7.2264e-02 6.9018e-02 -1.0470 0.2955236
## occ_indxsales -2.7211e-01 1.0795e-01 -2.5207 0.0119825 *
## occ_indxclerical -1.0887e-01 9.7751e-02 -1.1138 0.2658395
## occ_indxservices 2.3526e-02 1.3743e-01 0.1712 0.8641358
## occ_indxoperatives -1.4798e-01 8.5465e-02 -1.7315 0.0838986 .
## exp_career 2.7865e-03 4.2321e-02 0.0658 0.9475268
## IND 2.8894e-04 7.6577e-05 3.7733 0.0001779 ***
## SPO 3.7542e-06 1.4136e-06 2.6558 0.0081327 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp_career_female <- exp_career*female
selection.1 <- selection(selection=exp_career_female~occ_indx + marital,
outcome=logWage~tenure+I(tenure^2)+
data_NLSY$SPO+data_NLSY$TOT.EXP06,
data=data_NLSY[,c(1:11,13:19)],
method="2step" )
probit_sel <- glm(exp_career_female~occ_indx+AfAm+hispanic+
logHours+tenure+I(tenure^2)+data_NLSY$TOT.EXP06+
I(data_NLSY$TOT.EXP06^2)+
data_NLSY$SPO+marital, family="binomial"
(link="probit"), data = data_NLSY[,c(1:11,13:19)])
imr_var <- dnorm(probit_sel$linear.predictors)/pnorm(probit_sel$linear.predictors)
IMR <-cbind(imr_var, exp_career_female) #inverse mill's ratio
outcome1 <- lm(logWage ~ tenure + I(tenure^2)+
data_NLSY$SPO+data_NLSY$TOT.EXP06+
IMR[,1], subset=(exp_career_female==1),
data=data_NLSY[,c(1:11,13:19)])
outcome2 <- lm(logWage~logHours+AfAm+
hispanic+phys+hlth+marital+
occ_indx+tenure+I(tenure^2)+
data_NLSY$TOT.EXP06+
I(data_NLSY$TOT.EXP06^2)+
data_NLSY$SPO+IMR[,1],
subset=(exp_career_female==1),
data=data_NLSY[,c(1:11,13:19)])
star_probit <- stargazer(probit_sel, type="html",
out="tableprobit.html", align=T,
title="Table. 1 Probit Model of Occupational Expectations for Women",
intercept.bottom=F, flip=T)
| Dependent variable: | |
| exp_career_female | |
| Constant | 3.145** |
| (1.529) | |
| occ_indxprof | 0.442*** |
| (0.168) | |
| occ_indxhard science | 0.609*** |
| (0.186) | |
| occ_indxtechnical | 0.339 |
| (0.217) | |
| occ_indxsales | 0.137 |
| (0.280) | |
| occ_indxclerical | 0.210 |
| (0.258) | |
| occ_indxservices | -0.114 |
| (0.319) | |
| occ_indxoperatives | -0.092 |
| (0.259) | |
| AfAm | -0.071 |
| (0.153) | |
| hispanic | 0.213 |
| (0.178) | |
| logHours | -1.194*** |
| (0.395) | |
| tenure | 0.061 |
| (0.375) | |
| I(tenure2) | 0.001 |
| (0.001) | |
| TOT.EXP06 | -0.003 |
| (0.017) | |
| TOT.EXP062) | -0.00000 |
| (0.00000) | |
| SPO | 0.00000* |
| (0.00000) | |
| maritalnever married | 1.153*** |
| (0.245) | |
| maritalseparated | -3.734 |
| (103.654) | |
| maritaldivorced | -0.044 |
| (0.588) | |
| Observations | 951 |
| Log Likelihood | -306.237 |
| Akaike Inf. Crit. | 650.473 |
| Note: | p<0.1; p<0.05; p<0.01 |
star_selection <- stargazer(m.ols_null, m.ols_full,
outcome1, outcome2, selection.1,
type="html", out="tableselection.html",
title="Table. 2 Heckman Model Selection",
column.labels=c("uncorrected", "corrected"),
column.separate=c(2,2), selection.equation=T, single.row=T, dep.var.labels.include=F,
model.names=T, align=T, multicolumn=F)
| Dependent variable: | |||||
| OLS | OLS | OLS | OLS | selection | |
| uncorrected | corrected | ||||
| (1) | (2) | (3) | (4) | (5) | |
| logHours | -0.439 (0.366) | ||||
| occ_indxprof | 0.003 (0.051) | -0.071 (0.060) | 0.065 (0.167) | 0.295** (0.148) | |
| occ_indxhard science | -0.053 (0.062) | -0.118 (0.073) | -0.357* (0.188) | 0.501*** (0.164) | |
| occ_indxtechnical | -0.187*** (0.068) | -0.098 (0.076) | -0.060 (0.226) | 0.320* (0.186) | |
| occ_indxsales | -0.433*** (0.068) | -0.215** (0.102) | -0.589** (0.287) | 0.035 (0.228) | |
| occ_indxclerical | -0.393*** (0.062) | -0.148* (0.089) | -0.206 (0.280) | -0.046 (0.222) | |
| occ_indxservices | -0.517*** (0.065) | 0.079 (0.092) | -0.284 (0.354) | -0.228 (0.260) | |
| occ_indxoperatives | -0.400*** (0.059) | -0.195** (0.078) | -0.205 (0.273) | -0.165 (0.214) | |
| tenure | 0.022 (0.126) | 0.117 (0.316) | 1.072** (0.446) | ||
| I(tenure2) | -0.0004 (0.0003) | -0.00001 (0.00000) | -0.004*** (0.001) | ||
| SPO | 0.00000 (0.00000) | 0.00000 (0.00000) | |||
| TOT.EXP06 | -0.001 (0.006) | -0.005 (0.014) | -0.049** (0.020) | ||
| TOT.EXP062) | 0.00000 (0.00000) | 0.00001*** (0.00000) | |||
| exp_career | -0.028 (0.041) | ||||
| IND | 0.00000 (0.0001) | ||||
| SPO | 0.00000*** (0.00000) | ||||
| female | -0.235*** (0.032) | -0.223*** (0.043) | |||
| AfAm | -0.084* (0.049) | -0.172 (0.183) | |||
| hispanic | -0.016 (0.068) | -0.028 (0.232) | |||
| physExcellent | -0.008 (0.101) | -0.177 (0.306) | |||
| physVery good | -0.107** (0.052) | -0.075 (0.191) | |||
| physFair | -0.006 (0.052) | -0.047 (0.170) | |||
| physPoor | 0.065 (0.061) | 0.045 (0.196) | |||
| hlthoverweight | -0.083* (0.046) | -0.184 (0.184) | |||
| hlthunderweight | -0.159 (0.124) | -0.687 (0.468) | |||
| hlthnotascertained | 0.013 (0.059) | 0.061 (0.268) | |||
| maritalnever married | -0.022 (0.105) | 0.273 (0.195) | -0.562*** (0.132) | ||
| maritalseparated | 0.112 (0.401) | -4.042 (135.430) | |||
| maritaldivorced | -0.025 (0.200) | 0.824 (0.615) | -0.965*** (0.373) | ||
| IMR[, 1] | -0.155 (0.357) | 0.084 (0.226) | |||
| maritalremarried | -3.979 (318.613) | ||||
| Constant | 7.613*** (0.026) | 7.411*** (0.088) | 6.843*** (0.688) | 8.635*** (1.464) | -1.498*** (0.076) |
| Observations | 1,942 | 903 | 130 | 102 | 1,871 |
| R2 | 0.094 | 0.114 | 0.056 | 0.400 | |
| Adjusted R2 | 0.091 | 0.086 | 0.018 | 0.202 | |
| rho | 0.626 | ||||
| Inverse Mills Ratio | 0.422* (0.233) | ||||
| Residual Std. Error | 0.687 (df = 1933) | 0.556 (df = 875) | 1.048 (df = 124) | 0.544 (df = 76) | |
| F Statistic | 25.163*** (df = 8; 1933) | 4.151*** (df = 27; 875) | 1.479 (df = 5; 124) | 2.025** (df = 25; 76) | |
| Note: | p<0.1; p<0.05; p<0.01 | ||||
star_FE <- stargazer(m.ols_null, m.ols_full, mfixed, mfixed.2,
type="html", out="tableFE.html",
title="Table. 3 OLS and Fixed Effects Comparison",
column.labels=c("OLS", "FE"),
column.separate=c(2,2),
single.row=T, dep.var.labels.include=F,
model.names=F, align=T, multicolumn=F)
| Dependent variable: | ||||
| OLS | FE | |||
| (1) | (2) | (3) | (4) | |
| occ_indxprof | 0.003 (0.051) | -0.071 (0.060) | -0.260*** (0.053) | -0.395*** (0.062) |
| occ_indxhard science | -0.053 (0.062) | -0.118 (0.073) | -0.032 (0.062) | -0.122* (0.072) |
| occ_indxtechnical | -0.187*** (0.068) | -0.098 (0.076) | -0.092 (0.067) | -0.072 (0.074) |
| occ_indxsales | -0.433*** (0.068) | -0.215** (0.102) | -0.464*** (0.072) | -0.272** (0.106) |
| occ_indxclerical | -0.393*** (0.062) | -0.148* (0.089) | -0.354*** (0.061) | -0.109 (0.084) |
| occ_indxservices | -0.517*** (0.065) | 0.079 (0.092) | -0.401*** (0.064) | 0.024 (0.093) |
| occ_indxoperatives | -0.400*** (0.059) | -0.195** (0.078) | -0.221*** (0.061) | -0.148* (0.084) |
| tenure | 0.022 (0.126) | |||
| I(tenure2) | -0.0004 (0.0003) | |||
| TOT.EXP06 | -0.001 (0.006) | |||
| TOT.EXP062) | 0.00000 (0.00000) | |||
| exp_career | -0.028 (0.041) | 0.003 (0.039) | ||
| IND | 0.00000 (0.0001) | 0.0003*** (0.0001) | ||
| SPO | 0.00000*** (0.00000) | 0.00000*** (0.00000) | ||
| female | -0.235*** (0.032) | -0.223*** (0.043) | ||
| AfAm | -0.084* (0.049) | |||
| hispanic | -0.016 (0.068) | |||
| physExcellent | -0.008 (0.101) | -0.036 (0.080) | ||
| physVery good | -0.107** (0.052) | 0.014 (0.046) | ||
| physFair | -0.006 (0.052) | -0.022 (0.048) | ||
| physPoor | 0.065 (0.061) | 0.037 (0.056) | ||
| hlthoverweight | -0.083* (0.046) | |||
| hlthunderweight | -0.159 (0.124) | |||
| hlthnotascertained | 0.013 (0.059) | |||
| maritalnever married | -0.022 (0.105) | 0.118 (0.159) | ||
| maritalseparated | 0.112 (0.401) | 0.045 (0.523) | ||
| maritaldivorced | -0.025 (0.200) | 0.209 (0.214) | ||
| Constant | 7.613*** (0.026) | 7.411*** (0.088) | ||
| Observations | 1,942 | 903 | 1,942 | 853 |
| R2 | 0.094 | 0.114 | 0.065 | 0.140 |
| Adjusted R2 | 0.091 | 0.086 | -0.198 | -0.279 |
| Residual Std. Error | 0.687 (df = 1933) | 0.556 (df = 875) | ||
| F Statistic | 25.163*** (df = 8; 1933) | 4.151*** (df = 27; 875) | 15.104*** (df = 7; 1515) | 5.472*** (df = 17; 573) |
| Note: | p<0.1; p<0.05; p<0.01 | |||
#robustness
cov1 <- vcovHC(mfixed.2)
robust.se <- sqrt(diag(cov1))
star_Robust <- stargazer(mfixed.2,title="Table.4 Robust Standard Errors",
type="html", out="robust.html",
single.row=T, se=list(robust.se, NULL))
| Dependent variable: | |
| logWage | |
| physExcellent | -0.036 (0.068) |
| physVery good | 0.014 (0.051) |
| physFair | -0.022 (0.055) |
| physPoor | 0.037 (0.051) |
| maritalnever married | 0.118 (0.207) |
| maritalseparated | 0.045 (0.091) |
| maritaldivorced | 0.209 (0.296) |
| occ_indxprof | -0.395*** (0.077) |
| occ_indxhard science | -0.122 (0.078) |
| occ_indxtechnical | -0.072 (0.069) |
| occ_indxsales | -0.272** (0.108) |
| occ_indxclerical | -0.109 (0.098) |
| occ_indxservices | 0.024 (0.137) |
| occ_indxoperatives | -0.148* (0.085) |
| exp_career | 0.003 (0.042) |
| IND | 0.0003*** (0.0001) |
| SPO | 0.00000*** (0.00000) |
| Observations | 853 |
| R2 | 0.140 |
| Adjusted R2 | -0.279 |
| F Statistic | 5.472*** (df = 17; 573) |
| Note: | p<0.1; p<0.05; p<0.01 |
coeftest(mfixed.2)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## physExcellent -3.5780e-02 8.0394e-02 -0.4451 0.6564429
## physVery good 1.4246e-02 4.5706e-02 0.3117 0.7553872
## physFair -2.2353e-02 4.7956e-02 -0.4661 0.6413073
## physPoor 3.7020e-02 5.6022e-02 0.6608 0.5089956
## maritalnever married 1.1848e-01 1.5945e-01 0.7431 0.4577542
## maritalseparated 4.4580e-02 5.2263e-01 0.0853 0.9320522
## maritaldivorced 2.0855e-01 2.1427e-01 0.9733 0.3308023
## occ_indxprof -3.9529e-01 6.1773e-02 -6.3990 3.253e-10 ***
## occ_indxhard science -1.2213e-01 7.1596e-02 -1.7058 0.0885878 .
## occ_indxtechnical -7.2264e-02 7.3738e-02 -0.9800 0.3274958
## occ_indxsales -2.7211e-01 1.0646e-01 -2.5561 0.0108427 *
## occ_indxclerical -1.0887e-01 8.4019e-02 -1.2958 0.1955605
## occ_indxservices 2.3526e-02 9.2537e-02 0.2542 0.7994097
## occ_indxoperatives -1.4798e-01 8.3864e-02 -1.7646 0.0781702 .
## exp_career 2.7865e-03 3.8991e-02 0.0715 0.9430524
## IND 2.8894e-04 6.8037e-05 4.2469 2.529e-05 ***
## SPO 3.7542e-06 1.0297e-06 3.6459 0.0002907 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1