The Code

# read the ESS File
ess <- read.fst("All-ESS-Data.fst")

# filter for gb
ess_gb <- ess %>% filter(cntry == "GB")

# subset with variables of interest and data cleaning
# split occupations by their general categories
ess_sub <- ess_gb %>% select(eisced, hinctnta, eiscedp, isco08, isco08p, agea,
      gndr, health, iscoco, iscocop, essround)
ess_sub <- ess_sub %>% mutate(eisced = if_else(eisced %in% c(1:7), eisced, NA),
        hinctnta = if_else(hinctnta %in% c(1:10), hinctnta, NA),
        eiscedp = if_else(eiscedp %in% c(1:7), eiscedp, NA),
        isco08 = case_when(isco08 %in% c(0:999) ~ 0,
                  isco08 %in% c(1000:1999) ~ 1,
                  isco08 %in% c(2000:2999) ~ 2,
                  isco08 %in% c(3000:3999) ~ 3,
                  isco08 %in% c(4000:4999) ~ 4,
                  isco08 %in% c(5000:5999) ~ 5,
                  isco08 %in% c(6000:6999) ~ 6,
                  isco08 %in% c(7000:7999) ~ 7,
                  isco08 %in% c(8000:8999) ~ 8,
                  isco08 %in% c(9000:9999) ~ 9),
        isco08p = case_when(isco08p %in% c(0:999) ~ 0,
                  isco08p %in% c(1000:1999) ~ 1,
                  isco08p %in% c(2000:2999) ~ 2,
                  isco08p %in% c(3000:3999) ~ 3,
                  isco08p %in% c(4000:4999) ~ 4,
                  isco08p %in% c(5000:5999) ~ 5,
                  isco08p %in% c(6000:6999) ~ 6,
                  isco08p %in% c(7000:7999) ~ 7,
                  isco08p %in% c(8000:8999) ~ 8,
                  isco08p %in% c(9000:9999) ~ 9),
        iscoco = case_when(iscoco %in% c(0:999) ~ 0,
                           iscoco %in% c(1000:1999) ~ 1,
                           iscoco %in% c(2000:2999) ~ 2,
                           iscoco %in% c(3000:3999) ~ 3,
                           iscoco %in% c(4000:4999) ~ 4,
                           iscoco %in% c(5000:5999) ~ 5,
                           iscoco %in% c(6000:6999) ~ 6,
                           iscoco %in% c(7000:7999) ~ 7,
                           iscoco %in% c(8000:8999) ~ 8,
                           iscoco %in% c(9000:9999) ~ 9),
        iscocop = case_when(iscocop %in% c(0:999) ~ 0,
                           iscocop %in% c(1000:1999) ~ 1,
                           iscocop %in% c(2000:2999) ~ 2,
                           iscocop %in% c(3000:3999) ~ 3,
                           iscocop %in% c(4000:4999) ~ 4,
                           iscocop %in% c(5000:5999) ~ 5,
                           iscocop %in% c(6000:6999) ~ 6,
                           iscocop %in% c(7000:7999) ~ 7,
                           iscocop %in% c(8000:8999) ~ 8,
                           iscocop %in% c(9000:9999) ~ 9),
        agea = if_else(agea < 999, agea, NA),
        gndr = if_else(gndr %in% c(1:2), gndr, NA),
        health = if_else(health %in% c(1:5), health, NA),
        year = case_when(essround == 1 ~ 2002,
                         essround == 2 ~ 2004,
                         essround == 3 ~ 2006,
                         essround == 4 ~ 2008,
                         essround == 5 ~ 2010,
                         essround == 6 ~ 2012,
                         essround == 7 ~ 2014,
                         essround == 8 ~ 2016,
                         essround == 9 ~ 2018,
                         essround == 10 ~ 2020))

ess_sub <- ess_sub %>% mutate(isco = case_when(!is.na(isco08) ~ isco08,
                                               !is.na(iscoco) ~ iscoco),
                              iscop = case_when(!is.na(isco08p) ~ isco08p,
                                                !is.na(iscocop) ~ iscocop))

# select relevant variables
ess_sub <- na.omit(ess_sub %>% select(eisced, hinctnta, eiscedp, isco, 
          iscop, agea, gndr, health, year))

# mutate new variables to represent different homogamies
ess_sub <- ess_sub %>% mutate(edu_homo = if_else(eisced == eiscedp, 1, 0),
            occu_homo = if_else(isco == iscop, 1, 0))
ess_sub <- ess_sub %>% mutate(both_homo = if_else(edu_homo == 1 & occu_homo == 1,
            1, 0))


model1 <- lm(hinctnta ~ eisced, data = ess_sub)
model2 <- lm(hinctnta ~ eisced + gndr + agea + health, data = ess_sub)
model3 <- lm(hinctnta ~ eisced + gndr + agea + health +
      health*agea + agea*gndr, data = ess_sub)

modelsummary(list(model1, model2, model3))
 (1)   (2)   (3)
(Intercept) 5.225 5.793 6.740
(0.108) (0.259) (0.640)
eisced 0.426 0.422 0.424
(0.022) (0.022) (0.022)
gndr -0.338 -1.172
(0.082) (0.317)
agea 0.016 -0.005
(0.003) (0.013)
health -0.389 -0.198
(0.049) (0.195)
agea × health -0.004
(0.004)
gndr × agea 0.018
(0.007)
Num.Obs. 2987 2987 2987
R2 0.113 0.140 0.143
R2 Adj. 0.113 0.139 0.141
AIC 13270.5 13182.6 13178.1
BIC 13288.6 13218.6 13226.1
Log.Lik. -6632.272 -6585.294 -6581.061
RMSE 2.23 2.19 2.19