The Code
# read the ESS File
ess <- read.fst("All-ESS-Data.fst")
# filter for gb
ess_gb <- ess %>% filter(cntry == "GB")
# subset with variables of interest and data cleaning
# split occupations by their general categories
ess_sub <- ess_gb %>% select(eisced, hinctnta, eiscedp, isco08, isco08p, agea,
gndr, health, iscoco, iscocop, essround)
ess_sub <- ess_sub %>% mutate(eisced = if_else(eisced %in% c(1:7), eisced, NA),
hinctnta = if_else(hinctnta %in% c(1:10), hinctnta, NA),
eiscedp = if_else(eiscedp %in% c(1:7), eiscedp, NA),
isco08 = case_when(isco08 %in% c(0:999) ~ 0,
isco08 %in% c(1000:1999) ~ 1,
isco08 %in% c(2000:2999) ~ 2,
isco08 %in% c(3000:3999) ~ 3,
isco08 %in% c(4000:4999) ~ 4,
isco08 %in% c(5000:5999) ~ 5,
isco08 %in% c(6000:6999) ~ 6,
isco08 %in% c(7000:7999) ~ 7,
isco08 %in% c(8000:8999) ~ 8,
isco08 %in% c(9000:9999) ~ 9),
isco08p = case_when(isco08p %in% c(0:999) ~ 0,
isco08p %in% c(1000:1999) ~ 1,
isco08p %in% c(2000:2999) ~ 2,
isco08p %in% c(3000:3999) ~ 3,
isco08p %in% c(4000:4999) ~ 4,
isco08p %in% c(5000:5999) ~ 5,
isco08p %in% c(6000:6999) ~ 6,
isco08p %in% c(7000:7999) ~ 7,
isco08p %in% c(8000:8999) ~ 8,
isco08p %in% c(9000:9999) ~ 9),
iscoco = case_when(iscoco %in% c(0:999) ~ 0,
iscoco %in% c(1000:1999) ~ 1,
iscoco %in% c(2000:2999) ~ 2,
iscoco %in% c(3000:3999) ~ 3,
iscoco %in% c(4000:4999) ~ 4,
iscoco %in% c(5000:5999) ~ 5,
iscoco %in% c(6000:6999) ~ 6,
iscoco %in% c(7000:7999) ~ 7,
iscoco %in% c(8000:8999) ~ 8,
iscoco %in% c(9000:9999) ~ 9),
iscocop = case_when(iscocop %in% c(0:999) ~ 0,
iscocop %in% c(1000:1999) ~ 1,
iscocop %in% c(2000:2999) ~ 2,
iscocop %in% c(3000:3999) ~ 3,
iscocop %in% c(4000:4999) ~ 4,
iscocop %in% c(5000:5999) ~ 5,
iscocop %in% c(6000:6999) ~ 6,
iscocop %in% c(7000:7999) ~ 7,
iscocop %in% c(8000:8999) ~ 8,
iscocop %in% c(9000:9999) ~ 9),
agea = if_else(agea < 999, agea, NA),
gndr = if_else(gndr %in% c(1:2), gndr, NA),
health = if_else(health %in% c(1:5), health, NA),
year = case_when(essround == 1 ~ 2002,
essround == 2 ~ 2004,
essround == 3 ~ 2006,
essround == 4 ~ 2008,
essround == 5 ~ 2010,
essround == 6 ~ 2012,
essround == 7 ~ 2014,
essround == 8 ~ 2016,
essround == 9 ~ 2018,
essround == 10 ~ 2020))
ess_sub <- ess_sub %>% mutate(isco = case_when(!is.na(isco08) ~ isco08,
!is.na(iscoco) ~ iscoco),
iscop = case_when(!is.na(isco08p) ~ isco08p,
!is.na(iscocop) ~ iscocop))
# select relevant variables
ess_sub <- na.omit(ess_sub %>% select(eisced, hinctnta, eiscedp, isco,
iscop, agea, gndr, health, year))
# mutate new variables to represent different homogamies
ess_sub <- ess_sub %>% mutate(edu_homo = if_else(eisced == eiscedp, 1, 0),
occu_homo = if_else(isco == iscop, 1, 0))
ess_sub <- ess_sub %>% mutate(both_homo = if_else(edu_homo == 1 & occu_homo == 1,
1, 0))
model1 <- lm(hinctnta ~ eisced, data = ess_sub)
model2 <- lm(hinctnta ~ eisced + gndr + agea + health, data = ess_sub)
model3 <- lm(hinctnta ~ eisced + gndr + agea + health +
health*agea + agea*gndr, data = ess_sub)
modelsummary(list(model1, model2, model3))
|
(1)
|
(2)
|
(3)
|
(Intercept)
|
5.225
|
5.793
|
6.740
|
|
(0.108)
|
(0.259)
|
(0.640)
|
eisced
|
0.426
|
0.422
|
0.424
|
|
(0.022)
|
(0.022)
|
(0.022)
|
gndr
|
|
-0.338
|
-1.172
|
|
|
(0.082)
|
(0.317)
|
agea
|
|
0.016
|
-0.005
|
|
|
(0.003)
|
(0.013)
|
health
|
|
-0.389
|
-0.198
|
|
|
(0.049)
|
(0.195)
|
agea × health
|
|
|
-0.004
|
|
|
|
(0.004)
|
gndr × agea
|
|
|
0.018
|
|
|
|
(0.007)
|
Num.Obs.
|
2987
|
2987
|
2987
|
R2
|
0.113
|
0.140
|
0.143
|
R2 Adj.
|
0.113
|
0.139
|
0.141
|
AIC
|
13270.5
|
13182.6
|
13178.1
|
BIC
|
13288.6
|
13218.6
|
13226.1
|
Log.Lik.
|
-6632.272
|
-6585.294
|
-6581.061
|
RMSE
|
2.23
|
2.19
|
2.19
|