Setting up environment

packages <- c("tidyverse", "modelsummary", "forcats", "RColorBrewer", 
              "fst", "viridis", "knitr", "rmarkdown", "ggridges", "viridis", "questionr", "flextable", "effects", "infer") # add any you need here

new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: viridisLite
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## Warning: package 'effects' was built under R version 4.3.2
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.3.2
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
##  [6] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [11] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [16] "datasets"     "methods"      "base"        
## 
## [[3]]
##  [1] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
##  [6] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [11] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [16] "datasets"     "methods"      "base"        
## 
## [[4]]
##  [1] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "fst"          "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[6]]
##  [1] "viridis"      "viridisLite"  "fst"          "RColorBrewer" "modelsummary"
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[7]]
##  [1] "knitr"        "viridis"      "viridisLite"  "fst"          "RColorBrewer"
##  [6] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
## [11] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [16] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [21] "datasets"     "methods"      "base"        
## 
## [[8]]
##  [1] "rmarkdown"    "knitr"        "viridis"      "viridisLite"  "fst"         
##  [6] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
## [11] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [16] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [21] "utils"        "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "ggridges"     "rmarkdown"    "knitr"        "viridis"      "viridisLite" 
##  [6] "fst"          "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
## [11] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [21] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "ggridges"     "rmarkdown"    "knitr"        "viridis"      "viridisLite" 
##  [6] "fst"          "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
## [11] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [21] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "questionr"    "ggridges"     "rmarkdown"    "knitr"        "viridis"     
##  [6] "viridisLite"  "fst"          "RColorBrewer" "modelsummary" "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[12]]
##  [1] "flextable"    "questionr"    "ggridges"     "rmarkdown"    "knitr"       
##  [6] "viridis"      "viridisLite"  "fst"          "RColorBrewer" "modelsummary"
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[13]]
##  [1] "effects"      "carData"      "flextable"    "questionr"    "ggridges"    
##  [6] "rmarkdown"    "knitr"        "viridis"      "viridisLite"  "fst"         
## [11] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
## [16] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [21] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [26] "utils"        "datasets"     "methods"      "base"        
## 
## [[14]]
##  [1] "infer"        "effects"      "carData"      "flextable"    "questionr"   
##  [6] "ggridges"     "rmarkdown"    "knitr"        "viridis"      "viridisLite" 
## [11] "fst"          "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
## [16] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [21] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [26] "grDevices"    "utils"        "datasets"     "methods"      "base"
setwd("C:/Users/Erika/Desktop/SOC_202_YAY"
)

library(fst) 
ess <- read_fst("All-ESS-Data.fst")

Filtering and recoding

Variables:

Outcome -

Household’s Total Net Income (hinctnta): https://shorturl.at/sEFG0

Household’s Total Net Income (hinctnt): https://shorturl.at/chitY

Predictors -

Gender (gndr): https://shorturl.at/dnrS7

Highest Level of Education (edulvla): https://t.ly/mYdV3

Highest Level of Education (edulvlb): https://t.ly/_dZaa

Iceland_data <- ess %>%
  filter(cntry == "IS")
Iceland_variables <- Iceland_data %>%
  filter(cntry == "IS") %>%
  mutate(
    edulvla = ifelse(edulvla %in% c(77, 88, 99), NA, edulvla),
    edulvlb = ifelse(edulvlb %in% c(7777, 8888, 9999), NA, edulvlb),
    gndr = ifelse(gndr %in% c(9), NA, gndr),
    hinctnta = ifelse(hinctnta %in% c(77, 88, 99), NA, hinctnta),
    hinctnt = ifelse(hinctnt %in% c(77, 88, 99), NA, hinctnt),
  ) 
library(tidyverse)

Iceland_data <- ess %>%
     filter(cntry == "IS") %>%
     select(hinctnta)

Iceland_data$y <- Iceland_data$hinctnta
table(Iceland_data$y)
## 
##   1   2   3   4   5   6   7   8   9  10  77  88  99 
## 288 249 307 347 342 349 376 326 241 247 140 182   2
Iceland_data$y[Iceland_data$y %in% 77:99] <- NA
table(Iceland_data$y)
## 
##   1   2   3   4   5   6   7   8   9  10 
## 288 249 307 347 342 349 376 326 241 247
mean_y <- mean(Iceland_data$y, na.rm = TRUE)
cat("Mean of 'y' is:", mean_y, "\n")
## Mean of 'y' is: 5.461589
median_y <- median(Iceland_data$y, na.rm = TRUE)
cat("Median of 'y' is:", median_y, "\n")
## Median of 'y' is: 6
Iceland_data %>%
     summarize(
         mean_y = mean(y, na.rm = TRUE),
         median_y = median(y, na.rm = TRUE)
     ) %>%
     print()
##     mean_y median_y
## 1 5.461589        6
mode_y <- Iceland_data %>%
    filter(!is.na(y)) %>%
   count(y) %>%
     arrange(desc(n)) %>%
     slice(1) %>%
     pull(y)
cat("\nMode of Y:", mode_y, "\n")
## 
## Mode of Y: 7
sd_y <- sd(Iceland_data$y, na.rm = TRUE)
cat("Standard Deviation of 'y':", sd_y, "\n")
## Standard Deviation of 'y': 2.712026
summary(Iceland_data)
##     hinctnta           y         
##  Min.   : 1.00   Min.   : 1.000  
##  1st Qu.: 4.00   1st Qu.: 3.000  
##  Median : 6.00   Median : 6.000  
##  Mean   :12.89   Mean   : 5.462  
##  3rd Qu.: 8.00   3rd Qu.: 8.000  
##  Max.   :99.00   Max.   :10.000  
##  NA's   :579     NA's   :903
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
Iceland_data <- ess[ess$cntry == "IS", ]
Iceland_data_table <- as.data.table(Iceland_data)
library(modelsummary)

Iceland_data_subset <- Iceland_data %>%
     filter(cntry == "IS") %>%
     mutate(
         edulvla = ifelse(edulvla %in% c(77, 88, 99), NA, edulvla),
        edulvlb = ifelse(edulvlb %in% c(7777, 8888, 9999), NA, edulvlb),
         gndr = ifelse(gndr %in% c(77, 88, 99), NA, gndr),
         hinctnta = ifelse(hinctnta %in% c(77, 88, 99), NA, hinctnta),
         hinctnt = ifelse(hinctnt %in% c(77, 88, 99), NA, hinctnt),
     ) %>%
     select(edulvla, edulvlb, gndr, hinctnta, hinctnt)

datasummary_skim(Iceland_data_subset)
Unique (#) Missing (%) Mean SD Min Median Max
edulvla 4 87 3.3 1.3 2.0 3.0 5.0
edulvlb 15 15 431.2 264.3 0.0 321.0 5555.0
gndr 3 0 1.5 0.7 1.0 2.0 9.0
hinctnta 11 23 5.5 2.7 1.0 6.0 10.0
hinctnt 13 88 8.4 2.2 1.0 9.0 12.0
datasummary_skim(Iceland_data_subset, output = "flextable")
## Warning: The histogram argument is only supported for (a) output types "default",
##   "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
##   ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
##   compiled to PDF (via kableExtra)  or HTML (via kableExtra or gt). Use
##   `histogram=FALSE` to silence this warning.

Unique (#)

Missing (%)

Mean

SD

Min

Median

Max

edulvla

4

87

3.3

1.3

2.0

3.0

5.0

edulvlb

15

15

431.2

264.3

0.0

321.0

5555.0

gndr

3

0

1.5

0.7

1.0

2.0

9.0

hinctnta

11

23

5.5

2.7

1.0

6.0

10.0

hinctnt

13

88

8.4

2.2

1.0

9.0

12.0

Iceland_variables <- Iceland_data %>%
  filter(cntry == "IS") %>%
  mutate(
    edulvla = ifelse(edulvla %in% c(77, 88, 99), NA, edulvla),
    edulvlb = ifelse(edulvlb %in% c(7777, 8888, 9999), NA, edulvlb),
    gndr = ifelse(gndr %in% c(77, 88, 99), NA, gndr),
    hinctnta = ifelse(hinctnta %in% c(77, 88, 99), NA, hinctnta),
    hinctnt = ifelse(hinctnt %in% c(77, 88, 99), NA, hinctnt),
  ) 
summary_table <- datasummary_skim(Iceland_variables %>% select(edulvla, edulvlb, hinctnta, hinctnt)
, output = "flextable", title = "Table 1. Descriptive Statistics for Main Variables")
## Warning: The histogram argument is only supported for (a) output types "default",
##   "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
##   ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
##   compiled to PDF (via kableExtra)  or HTML (via kableExtra or gt). Use
##   `histogram=FALSE` to silence this warning.
summary_table
Table 1. Descriptive Statistics for Main Variables

Unique (#)

Missing (%)

Mean

SD

Min

Median

Max

edulvla

4

87

3.3

1.3

2.0

3.0

5.0

edulvlb

15

15

431.2

264.3

0.0

321.0

5555.0

hinctnta

11

23

5.5

2.7

1.0

6.0

10.0

hinctnt

13

88

8.4

2.2

1.0

9.0

12.0

Iceland_v2 <- Iceland_variables %>%
  rename(
    'Total_Income (round 1-4)' = hinctnt,
    'Total_Income (round 5-10)' = hinctnta,
    'Education_Level (round 1-4)' = edulvla,
    'Education_Level (round 5-10)' = edulvlb,
    'Gender' = gndr,
  )

summary_table_v2 <- datasummary_skim(Iceland_v2 %>% select("Total_Income (round 1-4)", "Total_Income (round 5-10)", "Education_Level (round 1-4)",
                                                           "Education_Level (round 5-10)", "Gender"), output = "flextable", title = "Table 1. Descriptive Statistics for Main Variables")
## Warning: The histogram argument is only supported for (a) output types "default",
##   "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
##   ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
##   compiled to PDF (via kableExtra)  or HTML (via kableExtra or gt). Use
##   `histogram=FALSE` to silence this warning.
summary_table_v2
Table 1. Descriptive Statistics for Main Variables

Unique (#)

Missing (%)

Mean

SD

Min

Median

Max

Total_Income (round 1-4)

13

88

8.4

2.2

1.0

9.0

12.0

Total_Income (round 5-10)

11

23

5.5

2.7

1.0

6.0

10.0

Education_Level (round 1-4)

4

87

3.3

1.3

2.0

3.0

5.0

Education_Level (round 5-10)

15

15

431.2

264.3

0.0

321.0

5555.0

Gender

3

0

1.5

0.7

1.0

2.0

9.0

T-Stat

Iceland_data <- ess %>%
  filter(cntry == "IS")
Iceland_data <- Iceland_data %>%
  filter(cntry == "IS") %>%
  mutate(
    edulvla = ifelse(edulvla %in% c(77, 88, 99), NA, edulvla),
    edulvlb = ifelse(edulvlb %in% c(7777, 8888, 9999), NA, edulvlb),
    gndr = ifelse(gndr %in% c(77, 88, 99), NA, gndr),
    hinctnta = ifelse(hinctnta %in% c(77, 88, 99), NA, hinctnta),
    hinctnt = ifelse(hinctnt %in% c(77, 88, 99), NA, hinctnt),
  ) 
Iceland_data$edulvla[Iceland_data$edulvla == 55] <- NA  
Iceland_data$edulvlb[Iceland_data$edulvlb == 5555] <- NA  

Iceland_data$educ.ba <- ifelse(Iceland_data$essround < 5 & Iceland_data$edulvla == 5, "BA",
                      ifelse(Iceland_data$essround >= 5 & Iceland_data$edulvlb > 600, "BA", "No BA"))

Iceland_data <- Iceland_data %>% filter(!is.na(educ.ba))
Iceland_data <- Iceland_data %>%
  mutate(
    gndr = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      gndr == 9 ~ NA_character_,
      TRUE ~ as.character(gndr)),
    
    hinctnta = case_when(
      essround >= 4 & hinctnta == 77 ~ NA_real_,
      hinctnta > 10 ~ NA_real_,  # Set values above 10 to NA
      TRUE ~ hinctnta
    ),

    # Set hinctnt to NA where conditions are met, else keep original value
    hinctnt = case_when(
      essround <= 3 & hinctnt == 77 ~ NA_real_,
      hinctnt > 10 ~ NA_real_,  # Set values above 10 to NA
      TRUE ~ hinctnt
    ),

    # Create total_income by merging hinctnta and hinctnt
    total_income = case_when(
      !is.na(hinctnta) ~ hinctnta,  # Use hinctnta if it's not NA
      !is.na(hinctnt) ~ hinctnt,    # Else use hinctnt if it's not NA
      TRUE ~ NA_real_              # Set to NA if both are NA
    )
  )

table(Iceland_data$gndr)
## 
## Female   Male 
##   1982   1890
table(Iceland_data$total_income)
## 
##   1   2   3   4   5   6   7   8   9  10 
## 286 258 314 360 362 366 408 383 371 328
test_stat <- Iceland_data %>%
  specify(explanatory = educ.ba, 
          response = total_income) %>% 
  hypothesize(null = "independence") %>%
  calculate(stat = "t")
## Warning: Removed 447 rows containing missing values.
## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "BA" - "No BA", or divided in the order "BA" / "No BA" for ratio-based
## statistics. To specify this order yourself, supply `order = c("BA", "No BA")`
## to the calculate() function.
print(test_stat$stat)
##        t 
## 14.98089
null_distribution <- Iceland_data %>%
  specify(explanatory = educ.ba,
          response = total_income) %>%
  hypothesize(null = "independence") %>%
  generate(reps = 1000, type = "permute") %>% 
  calculate(stat = "t")
## Warning: Removed 447 rows containing missing values.
## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "BA" - "No BA", or divided in the order "BA" / "No BA" for ratio-based
## statistics. To specify this order yourself, supply `order = c("BA", "No BA")`
## to the calculate() function.
p_val <- null_distribution %>% # 
  get_pvalue(obs_stat = test_stat, direction = "two-sided")
## Warning: Please be cautious in reporting a p-value of 0. This result is an
## approximation based on the number of `reps` chosen in the `generate()` step.
## See `?get_p_value()` for more information.
p_val
## # A tibble: 1 × 1
##   p_value
##     <dbl>
## 1       0

With a p-value of 0, we have strong evidence to reject the null.

null_distribution %>%
  visualize() +
  shade_p_value(obs_stat = test_stat, direction = "two-sided")

null_distribution
## Response: total_income (numeric)
## Explanatory: educ.ba (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
##    replicate    stat
##        <int>   <dbl>
##  1         1  0.892 
##  2         2  0.919 
##  3         3  1.69  
##  4         4  0.796 
##  5         5  0.930 
##  6         6 -0.963 
##  7         7 -0.151 
##  8         8 -0.0227
##  9         9 -0.572 
## 10        10  1.74  
## # ℹ 990 more rows
conf_int <- null_distribution %>%
  get_confidence_interval(level = 0.95, type = "percentile")

null_distribution %>%
  visualize() +
  shade_p_value(obs_stat = test_stat, direction = "two-sided") +
  shade_confidence_interval(endpoints = conf_int)+
  labs(title = "Figure 1. Simulation-Based Null Distribution",
       x = "Stat",
       y = "Count")

Regression Models and Interaction visual

Regression and GLM Models

Iceland_data$edulvla[Iceland_data$edulvla == 55] <- NA  
Iceland_data$edulvlb[Iceland_data$edulvlb == 5555] <- NA  

Iceland_data$educ.ba <- ifelse(Iceland_data$essround < 5 & Iceland_data$edulvla == 5, "BA",
                      ifelse(Iceland_data$essround >= 5 & Iceland_data$edulvlb > 600, "BA", "No BA"))

Iceland_data <- Iceland_data %>% filter(!is.na(educ.ba))

Here we want to separate data frame since few variables have been changed, which won’t work in the t-test and GLM later. Therefore, we created a data frame and keep a original file.

1)Character copy

Iceland_variables$edulvla[Iceland_variables$edulvla == 55] <- NA  
Iceland_variables$edulvlb[Iceland_variables$edulvlb == 5555] <- NA  

# Recoding to 1 for "BA" and 0 for "No BA"
Iceland_variables$educ.ba <- ifelse(Iceland_variables$essround < 5 & Iceland_data$edulvla == 5, "BA",
                      ifelse(Iceland_variables$essround >= 5 & Iceland_data$edulvlb > 600, "BA", "No BA"))
## Warning in Iceland_variables$essround < 5 & Iceland_data$edulvla == 5: longer
## object length is not a multiple of shorter object length
## Warning in Iceland_variables$essround >= 5 & Iceland_data$edulvlb > 600: longer
## object length is not a multiple of shorter object length
Iceland_variables <- Iceland_data %>% filter(!is.na(educ.ba))
table(Iceland_variables$educ.ba)
## 
##    BA No BA 
##  1332  2551

2)Keep one as Numeric for GLM

Iceland_data$edulvla[Iceland_data$edulvla == 55] <- NA  
Iceland_data$edulvlb[Iceland_data$edulvlb == 5555] <- NA  

# Recoding to 1 for "BA" and 0 for "No BA"
Iceland_data$educ.ba <- ifelse(Iceland_data$essround < 5 & Iceland_data$edulvla == 5, 1,
                      ifelse(Iceland_data$essround >= 5 & Iceland_data$edulvlb > 600, 1, 0))


Iceland_data <- Iceland_data %>% filter(!is.na(educ.ba))
table(Iceland_data$educ.ba)
## 
##    0    1 
## 2551 1332

Here, we ensure we have two dataframes with two different categorical variables using class function.

class(Iceland_data$educ.ba)
## [1] "numeric"
class(Iceland_variables$educ.ba)
## [1] "character"
library(modelsummary)

model1 <- glm(educ.ba ~ total_income, family = binomial, data = Iceland_data)
model2 <- glm(educ.ba ~ total_income + gndr, family = binomial, data = Iceland_data)
model3 <- glm(educ.ba ~ total_income + gndr + total_income*gndr, family = binomial, data = Iceland_data)
modelsummary(
  list(model1, model2, model3))
 (1)   (2)   (3)
(Intercept) −1.736 −1.466 −1.490
(0.093) (0.097) (0.122)
total_income 0.192 0.202 0.206
(0.014) (0.014) (0.019)
gndrMale −0.693 −0.634
(0.075) (0.192)
total_income × gndrMale −0.009
(0.029)
Num.Obs. 3436 3429 3429
AIC 4274.5 4182.5 4184.4
BIC 4286.8 4201.0 4209.0
Log.Lik. −2135.254 −2088.271 −2088.216
RMSE 0.47 0.46 0.46
modelsummary(
  list(model1, model2, model3),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
    title = 'Table 2. Model Summary and Predictors Interacting')
Table 2. Model Summary and Predictors Interacting
 (1)   (2)   (3)
(Intercept) −1.7 (0.1)*** −1.5 (0.1)*** −1.5 (0.1)***
total_income 0.2 (0.0)*** 0.2 (0.0)*** 0.2 (0.0)***
gndrMale −0.7 (0.1)*** −0.6 (0.2)***
total_income × gndrMale 0.0 (0.0)
Num.Obs. 3436 3429 3429
AIC 4274.5 4182.5 4184.4
BIC 4286.8 4201.0 4209.0
Log.Lik. −2135.254 −2088.271 −2088.216
RMSE 0.47 0.46 0.46
  coef_omit = "Intercept"
modelsummary(
  list(model1, model2, model3),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
  title = 'Table 2. Model Summary and Predictors Interacting',
  coef_omit = "Intercept")
Table 2. Model Summary and Predictors Interacting
 (1)   (2)   (3)
total_income 0.2 (0.0)*** 0.2 (0.0)*** 0.2 (0.0)***
gndrMale −0.7 (0.1)*** −0.6 (0.2)***
total_income × gndrMale 0.0 (0.0)
Num.Obs. 3436 3429 3429
AIC 4274.5 4182.5 4184.4
BIC 4286.8 4201.0 4209.0
Log.Lik. −2135.254 −2088.271 −2088.216
RMSE 0.47 0.46 0.46
interaction_plot <- effect("total_income*gndr", model3, na.rm=TRUE)
plot(interaction_plot,
     main="Figure 2. Interaction Effect of Gender in related to Total Income",
     xlab="Total Income",
     ylab="Difference in Education level")

Iceland_clean <- Iceland_data %>%
  filter(!is.na(gndr) & !is.na(total_income))

Iceland_probs <- Iceland_clean %>%
  count(gndr,total_income) %>%
  group_by(gndr) %>%
  mutate(prob = n / sum(n))

ggplot(Iceland_probs, aes(x = as.factor(total_income), y = prob, color = gndr)) +
  geom_point() +
  geom_line(aes(group = gndr)) +
  labs(title = "Figure 3. Conditional Probabilities of Total Income",
       subtitle = "by Gender difference",
       x = "Income Scale", 
       y = "Education Level") +
  theme_minimal()