Final_Project

Setting up environment

packages <- c("tidyverse", "modelsummary", "forcats", "RColorBrewer", 
              "fst", "viridis", "knitr", "rmarkdown", "ggridges", "viridis", "questionr", "flextable", "effects", "infer") # add any you need here

new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: viridisLite
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose

## Warning: package 'effects' was built under R version 4.3.2

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.3.2

## lattice theme set by effectsTheme()
## See ?effectsTheme for details.

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
##  [6] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [11] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [16] "datasets"     "methods"      "base"        
## 
## [[3]]
##  [1] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
##  [6] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [11] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [16] "datasets"     "methods"      "base"        
## 
## [[4]]
##  [1] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "fst"          "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[6]]
##  [1] "viridis"      "viridisLite"  "fst"          "RColorBrewer" "modelsummary"
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[7]]
##  [1] "knitr"        "viridis"      "viridisLite"  "fst"          "RColorBrewer"
##  [6] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
## [11] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [16] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [21] "datasets"     "methods"      "base"        
## 
## [[8]]
##  [1] "rmarkdown"    "knitr"        "viridis"      "viridisLite"  "fst"         
##  [6] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
## [11] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [16] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [21] "utils"        "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "ggridges"     "rmarkdown"    "knitr"        "viridis"      "viridisLite" 
##  [6] "fst"          "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
## [11] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [21] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "ggridges"     "rmarkdown"    "knitr"        "viridis"      "viridisLite" 
##  [6] "fst"          "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
## [11] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [21] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "questionr"    "ggridges"     "rmarkdown"    "knitr"        "viridis"     
##  [6] "viridisLite"  "fst"          "RColorBrewer" "modelsummary" "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[12]]
##  [1] "flextable"    "questionr"    "ggridges"     "rmarkdown"    "knitr"       
##  [6] "viridis"      "viridisLite"  "fst"          "RColorBrewer" "modelsummary"
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[13]]
##  [1] "effects"      "carData"      "flextable"    "questionr"    "ggridges"    
##  [6] "rmarkdown"    "knitr"        "viridis"      "viridisLite"  "fst"         
## [11] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
## [16] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [21] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [26] "utils"        "datasets"     "methods"      "base"        
## 
## [[14]]
##  [1] "infer"        "effects"      "carData"      "flextable"    "questionr"   
##  [6] "ggridges"     "rmarkdown"    "knitr"        "viridis"      "viridisLite" 
## [11] "fst"          "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
## [16] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [21] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [26] "grDevices"    "utils"        "datasets"     "methods"      "base"

setwd("C:/Users/Erika/Desktop/SOC_202_YAY"
)

library(fst)

ess <- read_fst("All-ESS-Data.fst")

Filtering and recoding

Variables:

Outcome -

Household’s Total Net Income (hinctnta): https://shorturl.at/sEFG0

Household’s Total Net Income (hinctnt): https://shorturl.at/chitY

Predictors -

Gender (gndr): https://shorturl.at/dnrS7

Highest Level of Education (edulvla): https://t.ly/mYdV3

Highest Level of Education (edulvlb): https://t.ly/_dZaa

Iceland_data <- ess %>%
  filter(cntry == "IS")

Iceland_variables <- Iceland_data %>%
  filter(cntry == "IS") %>%
  mutate(
    edulvla = ifelse(edulvla %in% c(77, 88, 99), NA, edulvla),
    edulvlb = ifelse(edulvlb %in% c(7777, 8888, 9999), NA, edulvlb),
    gndr = ifelse(gndr %in% c(9), NA, gndr),
    hinctnta = ifelse(hinctnta %in% c(77, 88, 99), NA, hinctnta),
    hinctnt = ifelse(hinctnt %in% c(77, 88, 99), NA, hinctnt),
  )

library(tidyverse)

Iceland_data <- ess %>%
     filter(cntry == "IS") %>%
     select(hinctnta)

Iceland_data$y <- Iceland_data$hinctnta
table(Iceland_data$y)

## 
##   1   2   3   4   5   6   7   8   9  10  77  88  99 
## 288 249 307 347 342 349 376 326 241 247 140 182   2

Iceland_data$y[Iceland_data$y %in% 77:99] <- NA
table(Iceland_data$y)

## 
##   1   2   3   4   5   6   7   8   9  10 
## 288 249 307 347 342 349 376 326 241 247

mean_y <- mean(Iceland_data$y, na.rm = TRUE)
cat("Mean of 'y' is:", mean_y, "\n")

## Mean of 'y' is: 5.461589

median_y <- median(Iceland_data$y, na.rm = TRUE)
cat("Median of 'y' is:", median_y, "\n")

## Median of 'y' is: 6

Iceland_data %>%
     summarize(
         mean_y = mean(y, na.rm = TRUE),
         median_y = median(y, na.rm = TRUE)
     ) %>%
     print()

##     mean_y median_y
## 1 5.461589        6

mode_y <- Iceland_data %>%
    filter(!is.na(y)) %>%
   count(y) %>%
     arrange(desc(n)) %>%
     slice(1) %>%
     pull(y)
cat("\nMode of Y:", mode_y, "\n")

## 
## Mode of Y: 7

sd_y <- sd(Iceland_data$y, na.rm = TRUE)
cat("Standard Deviation of 'y':", sd_y, "\n")

## Standard Deviation of 'y': 2.712026

summary(Iceland_data)

##     hinctnta           y         
##  Min.   : 1.00   Min.   : 1.000  
##  1st Qu.: 4.00   1st Qu.: 3.000  
##  Median : 6.00   Median : 6.000  
##  Mean   :12.89   Mean   : 5.462  
##  3rd Qu.: 8.00   3rd Qu.: 8.000  
##  Max.   :99.00   Max.   :10.000  
##  NA's   :579     NA's   :903

library(data.table)

## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

## The following object is masked from 'package:purrr':
## 
##     transpose

Iceland_data <- ess[ess$cntry == "IS", ]
Iceland_data_table <- as.data.table(Iceland_data)

library(modelsummary)

Iceland_data_subset <- Iceland_data %>%
     filter(cntry == "IS") %>%
     mutate(
         edulvla = ifelse(edulvla %in% c(77, 88, 99), NA, edulvla),
        edulvlb = ifelse(edulvlb %in% c(7777, 8888, 9999), NA, edulvlb),
         gndr = ifelse(gndr %in% c(77, 88, 99), NA, gndr),
         hinctnta = ifelse(hinctnta %in% c(77, 88, 99), NA, hinctnta),
         hinctnt = ifelse(hinctnt %in% c(77, 88, 99), NA, hinctnt),
     ) %>%
     select(edulvla, edulvlb, gndr, hinctnta, hinctnt)

datasummary_skim(Iceland_data_subset)

	Unique (#)	Missing (%)	Mean	SD	Min	Median	Max
edulvla	4	87	3.3	1.3	2.0	3.0	5.0
edulvlb	15	15	431.2	264.3	0.0	321.0	5555.0
gndr	3	0	1.5	0.7	1.0	2.0	9.0
hinctnta	11	23	5.5	2.7	1.0	6.0	10.0
hinctnt	13	88	8.4	2.2	1.0	9.0	12.0

datasummary_skim(Iceland_data_subset, output = "flextable")

## Warning: The histogram argument is only supported for (a) output types "default",
##   "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
##   ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
##   compiled to PDF (via kableExtra)  or HTML (via kableExtra or gt). Use
##   `histogram=FALSE` to silence this warning.

	Unique (#)	Missing (%)	Mean	SD	Min	Median	Max
edulvla	4	87	3.3	1.3	2.0	3.0	5.0
edulvlb	15	15	431.2	264.3	0.0	321.0	5555.0
gndr	3	0	1.5	0.7	1.0	2.0	9.0
hinctnta	11	23	5.5	2.7	1.0	6.0	10.0
hinctnt	13	88	8.4	2.2	1.0	9.0	12.0

Iceland_variables <- Iceland_data %>%
  filter(cntry == "IS") %>%
  mutate(
    edulvla = ifelse(edulvla %in% c(77, 88, 99), NA, edulvla),
    edulvlb = ifelse(edulvlb %in% c(7777, 8888, 9999), NA, edulvlb),
    gndr = ifelse(gndr %in% c(77, 88, 99), NA, gndr),
    hinctnta = ifelse(hinctnta %in% c(77, 88, 99), NA, hinctnta),
    hinctnt = ifelse(hinctnt %in% c(77, 88, 99), NA, hinctnt),
  ) 
summary_table <- datasummary_skim(Iceland_variables %>% select(edulvla, edulvlb, hinctnta, hinctnt)
, output = "flextable", title = "Table 1. Descriptive Statistics for Main Variables")

## Warning: The histogram argument is only supported for (a) output types "default",
##   "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
##   ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
##   compiled to PDF (via kableExtra)  or HTML (via kableExtra or gt). Use
##   `histogram=FALSE` to silence this warning.

summary_table

Table 1. Descriptive Statistics for Main Variables
	Unique (#)	Missing (%)	Mean	SD	Min	Median	Max
edulvla	4	87	3.3	1.3	2.0	3.0	5.0
edulvlb	15	15	431.2	264.3	0.0	321.0	5555.0
hinctnta	11	23	5.5	2.7	1.0	6.0	10.0
hinctnt	13	88	8.4	2.2	1.0	9.0	12.0

Iceland_v2 <- Iceland_variables %>%
  rename(
    'Total_Income (round 1-4)' = hinctnt,
    'Total_Income (round 5-10)' = hinctnta,
    'Education_Level (round 1-4)' = edulvla,
    'Education_Level (round 5-10)' = edulvlb,
    'Gender' = gndr,
  )

summary_table_v2 <- datasummary_skim(Iceland_v2 %>% select("Total_Income (round 1-4)", "Total_Income (round 5-10)", "Education_Level (round 1-4)",
                                                           "Education_Level (round 5-10)", "Gender"), output = "flextable", title = "Table 1. Descriptive Statistics for Main Variables")

## Warning: The histogram argument is only supported for (a) output types "default",
##   "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
##   ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
##   compiled to PDF (via kableExtra)  or HTML (via kableExtra or gt). Use
##   `histogram=FALSE` to silence this warning.

summary_table_v2

Table 1. Descriptive Statistics for Main Variables
	Unique (#)	Missing (%)	Mean	SD	Min	Median	Max
Total_Income (round 1-4)	13	88	8.4	2.2	1.0	9.0	12.0
Total_Income (round 5-10)	11	23	5.5	2.7	1.0	6.0	10.0
Education_Level (round 1-4)	4	87	3.3	1.3	2.0	3.0	5.0
Education_Level (round 5-10)	15	15	431.2	264.3	0.0	321.0	5555.0
Gender	3	0	1.5	0.7	1.0	2.0	9.0

T-Stat

Iceland_data <- ess %>%
  filter(cntry == "IS")

Iceland_data <- Iceland_data %>%
  filter(cntry == "IS") %>%
  mutate(
    edulvla = ifelse(edulvla %in% c(77, 88, 99), NA, edulvla),
    edulvlb = ifelse(edulvlb %in% c(7777, 8888, 9999), NA, edulvlb),
    gndr = ifelse(gndr %in% c(77, 88, 99), NA, gndr),
    hinctnta = ifelse(hinctnta %in% c(77, 88, 99), NA, hinctnta),
    hinctnt = ifelse(hinctnt %in% c(77, 88, 99), NA, hinctnt),
  )

Iceland_data$edulvla[Iceland_data$edulvla == 55] <- NA  
Iceland_data$edulvlb[Iceland_data$edulvlb == 5555] <- NA  

Iceland_data$educ.ba <- ifelse(Iceland_data$essround < 5 & Iceland_data$edulvla == 5, "BA",
                      ifelse(Iceland_data$essround >= 5 & Iceland_data$edulvlb > 600, "BA", "No BA"))

Iceland_data <- Iceland_data %>% filter(!is.na(educ.ba))

Iceland_data <- Iceland_data %>%
  mutate(
    gndr = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      gndr == 9 ~ NA_character_,
      TRUE ~ as.character(gndr)),
    
    hinctnta = case_when(
      essround >= 4 & hinctnta == 77 ~ NA_real_,
      hinctnta > 10 ~ NA_real_,  # Set values above 10 to NA
      TRUE ~ hinctnta
    ),

    # Set hinctnt to NA where conditions are met, else keep original value
    hinctnt = case_when(
      essround <= 3 & hinctnt == 77 ~ NA_real_,
      hinctnt > 10 ~ NA_real_,  # Set values above 10 to NA
      TRUE ~ hinctnt
    ),

    # Create total_income by merging hinctnta and hinctnt
    total_income = case_when(
      !is.na(hinctnta) ~ hinctnta,  # Use hinctnta if it's not NA
      !is.na(hinctnt) ~ hinctnt,    # Else use hinctnt if it's not NA
      TRUE ~ NA_real_              # Set to NA if both are NA
    )
  )

table(Iceland_data$gndr)

## 
## Female   Male 
##   1982   1890

table(Iceland_data$total_income)

## 
##   1   2   3   4   5   6   7   8   9  10 
## 286 258 314 360 362 366 408 383 371 328

test_stat <- Iceland_data %>%
  specify(explanatory = educ.ba, 
          response = total_income) %>% 
  hypothesize(null = "independence") %>%
  calculate(stat = "t")

## Warning: Removed 447 rows containing missing values.

## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "BA" - "No BA", or divided in the order "BA" / "No BA" for ratio-based
## statistics. To specify this order yourself, supply `order = c("BA", "No BA")`
## to the calculate() function.

print(test_stat$stat)

##        t 
## 14.98089

null_distribution <- Iceland_data %>%
  specify(explanatory = educ.ba,
          response = total_income) %>%
  hypothesize(null = "independence") %>%
  generate(reps = 1000, type = "permute") %>% 
  calculate(stat = "t")

## Warning: Removed 447 rows containing missing values.

## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "BA" - "No BA", or divided in the order "BA" / "No BA" for ratio-based
## statistics. To specify this order yourself, supply `order = c("BA", "No BA")`
## to the calculate() function.

p_val <- null_distribution %>% # 
  get_pvalue(obs_stat = test_stat, direction = "two-sided")

## Warning: Please be cautious in reporting a p-value of 0. This result is an
## approximation based on the number of `reps` chosen in the `generate()` step.
## See `?get_p_value()` for more information.

p_val

## # A tibble: 1 × 1
##   p_value
##     <dbl>
## 1       0

With a p-value of 0, we have strong evidence to reject the null.

null_distribution %>%
  visualize() +
  shade_p_value(obs_stat = test_stat, direction = "two-sided")

null_distribution

## Response: total_income (numeric)
## Explanatory: educ.ba (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
##    replicate    stat
##        <int>   <dbl>
##  1         1  0.892 
##  2         2  0.919 
##  3         3  1.69  
##  4         4  0.796 
##  5         5  0.930 
##  6         6 -0.963 
##  7         7 -0.151 
##  8         8 -0.0227
##  9         9 -0.572 
## 10        10  1.74  
## # ℹ 990 more rows

conf_int <- null_distribution %>%
  get_confidence_interval(level = 0.95, type = "percentile")

null_distribution %>%
  visualize() +
  shade_p_value(obs_stat = test_stat, direction = "two-sided") +
  shade_confidence_interval(endpoints = conf_int)+
  labs(title = "Figure 1. Simulation-Based Null Distribution",
       x = "Stat",
       y = "Count")

Regression Models and Interaction visual

Regression and GLM Models

Iceland_data$edulvla[Iceland_data$edulvla == 55] <- NA  
Iceland_data$edulvlb[Iceland_data$edulvlb == 5555] <- NA  

Iceland_data$educ.ba <- ifelse(Iceland_data$essround < 5 & Iceland_data$edulvla == 5, "BA",
                      ifelse(Iceland_data$essround >= 5 & Iceland_data$edulvlb > 600, "BA", "No BA"))

Iceland_data <- Iceland_data %>% filter(!is.na(educ.ba))

Here we want to separate data frame since few variables have been changed, which won’t work in the t-test and GLM later. Therefore, we created a data frame and keep a original file.

1)Character copy

Iceland_variables$edulvla[Iceland_variables$edulvla == 55] <- NA  
Iceland_variables$edulvlb[Iceland_variables$edulvlb == 5555] <- NA  

# Recoding to 1 for "BA" and 0 for "No BA"
Iceland_variables$educ.ba <- ifelse(Iceland_variables$essround < 5 & Iceland_data$edulvla == 5, "BA",
                      ifelse(Iceland_variables$essround >= 5 & Iceland_data$edulvlb > 600, "BA", "No BA"))

## Warning in Iceland_variables$essround < 5 & Iceland_data$edulvla == 5: longer
## object length is not a multiple of shorter object length

## Warning in Iceland_variables$essround >= 5 & Iceland_data$edulvlb > 600: longer
## object length is not a multiple of shorter object length

Iceland_variables <- Iceland_data %>% filter(!is.na(educ.ba))
table(Iceland_variables$educ.ba)

## 
##    BA No BA 
##  1332  2551

2)Keep one as Numeric for GLM

Iceland_data$edulvla[Iceland_data$edulvla == 55] <- NA  
Iceland_data$edulvlb[Iceland_data$edulvlb == 5555] <- NA  

# Recoding to 1 for "BA" and 0 for "No BA"
Iceland_data$educ.ba <- ifelse(Iceland_data$essround < 5 & Iceland_data$edulvla == 5, 1,
                      ifelse(Iceland_data$essround >= 5 & Iceland_data$edulvlb > 600, 1, 0))


Iceland_data <- Iceland_data %>% filter(!is.na(educ.ba))
table(Iceland_data$educ.ba)

## 
##    0    1 
## 2551 1332

Here, we ensure we have two dataframes with two different categorical variables using class function.

class(Iceland_data$educ.ba)

## [1] "numeric"

class(Iceland_variables$educ.ba)

## [1] "character"

library(modelsummary)

model1 <- glm(educ.ba ~ total_income, family = binomial, data = Iceland_data)
model2 <- glm(educ.ba ~ total_income + gndr, family = binomial, data = Iceland_data)
model3 <- glm(educ.ba ~ total_income + gndr + total_income*gndr, family = binomial, data = Iceland_data)
modelsummary(
  list(model1, model2, model3))

	(1)	(2)	(3)
(Intercept)	−1.736	−1.466	−1.490
	(0.093)	(0.097)	(0.122)
total_income	0.192	0.202	0.206
	(0.014)	(0.014)	(0.019)
gndrMale		−0.693	−0.634
		(0.075)	(0.192)
total_income × gndrMale			−0.009
			(0.029)
Num.Obs.	3436	3429	3429
AIC	4274.5	4182.5	4184.4
BIC	4286.8	4201.0	4209.0
Log.Lik.	−2135.254	−2088.271	−2088.216
RMSE	0.47	0.46	0.46

modelsummary(
  list(model1, model2, model3),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
    title = 'Table 2. Model Summary and Predictors Interacting')

Table 2. Model Summary and Predictors Interacting
	(1)	(2)	(3)
(Intercept)	−1.7 (0.1)***	−1.5 (0.1)***	−1.5 (0.1)***
total_income	0.2 (0.0)***	0.2 (0.0)***	0.2 (0.0)***
gndrMale		−0.7 (0.1)***	−0.6 (0.2)***
total_income × gndrMale			0.0 (0.0)
Num.Obs.	3436	3429	3429
AIC	4274.5	4182.5	4184.4
BIC	4286.8	4201.0	4209.0
Log.Lik.	−2135.254	−2088.271	−2088.216
RMSE	0.47	0.46	0.46

  coef_omit = "Intercept"

modelsummary(
  list(model1, model2, model3),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
  title = 'Table 2. Model Summary and Predictors Interacting',
  coef_omit = "Intercept")

Table 2. Model Summary and Predictors Interacting
	(1)	(2)	(3)
total_income	0.2 (0.0)***	0.2 (0.0)***	0.2 (0.0)***
gndrMale		−0.7 (0.1)***	−0.6 (0.2)***
total_income × gndrMale			0.0 (0.0)
Num.Obs.	3436	3429	3429
AIC	4274.5	4182.5	4184.4
BIC	4286.8	4201.0	4209.0
Log.Lik.	−2135.254	−2088.271	−2088.216
RMSE	0.47	0.46	0.46

interaction_plot <- effect("total_income*gndr", model3, na.rm=TRUE)
plot(interaction_plot,
     main="Figure 2. Interaction Effect of Gender in related to Total Income",
     xlab="Total Income",
     ylab="Difference in Education level")

Iceland_clean <- Iceland_data %>%
  filter(!is.na(gndr) & !is.na(total_income))

Iceland_probs <- Iceland_clean %>%
  count(gndr,total_income) %>%
  group_by(gndr) %>%
  mutate(prob = n / sum(n))

ggplot(Iceland_probs, aes(x = as.factor(total_income), y = prob, color = gndr)) +
  geom_point() +
  geom_line(aes(group = gndr)) +
  labs(title = "Figure 3. Conditional Probabilities of Total Income",
       subtitle = "by Gender difference",
       x = "Income Scale", 
       y = "Education Level") +
  theme_minimal()

Final_Project

2023-12-13

Setting up environment

Filtering and recoding

T-Stat

Regression Models and Interaction visual