rm(list=ls()); gc()
##          used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 526536 28.2    1169674 62.5         NA   669428 35.8
## Vcells 971729  7.5    8388608 64.0      16384  1851813 14.2
# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "MASS", "aod", "interactions", "kableExtra", "flextable", "scales") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: carData
## 
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## 
## Loading required package: grid
## 
## Loading required package: Matrix
## 
## 
## Attaching package: 'Matrix'
## 
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Loading required package: survival
## 
## 
## Attaching package: 'survey'
## 
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
## 
## 
## 
## Attaching package: 'MASS'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
## 
## Attaching package: 'aod'
## 
## 
## The following object is masked from 'package:survival':
## 
##     rats
## 
## 
## 
## Attaching package: 'kableExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## 
## 
## 
## Attaching package: 'scales'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "effects"      "carData"      "modelsummary" "fst"          "infer"       
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[6]]
##  [1] "survey"       "survival"     "Matrix"       "grid"         "effects"     
##  [6] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[7]]
##  [1] "MASS"         "survey"       "survival"     "Matrix"       "grid"        
##  [6] "effects"      "carData"      "modelsummary" "fst"          "infer"       
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[8]]
##  [1] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
##  [6] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [11] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [16] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [21] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [26] "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "interactions" "aod"          "MASS"         "survey"       "survival"    
##  [6] "Matrix"       "grid"         "effects"      "carData"      "modelsummary"
## [11] "fst"          "infer"        "lubridate"    "forcats"      "stringr"     
## [16] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [21] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [26] "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "kableExtra"   "interactions" "aod"          "MASS"         "survey"      
##  [6] "survival"     "Matrix"       "grid"         "effects"      "carData"     
## [11] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
## [16] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [21] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [26] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "flextable"    "kableExtra"   "interactions" "aod"          "MASS"        
##  [6] "survey"       "survival"     "Matrix"       "grid"         "effects"     
## [11] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [16] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [21] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [26] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [31] "base"        
## 
## [[12]]
##  [1] "scales"       "flextable"    "kableExtra"   "interactions" "aod"         
##  [6] "MASS"         "survey"       "survival"     "Matrix"       "grid"        
## [11] "effects"      "carData"      "modelsummary" "fst"          "infer"       
## [16] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [21] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [26] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [31] "methods"      "base"

Task 1

Use data for Germany and the variable coding for model3 and model4 from the tutorial (i.e., both for the cleaning & recode, as well as the linear model formulas). Do a modelsummary table displaying both model outputs. Interpret the coefficients for the MLR without the interaction (i.e., the first displayed model), and interpret model fit metrics for both models.

germany_data <- read_fst("germany_data.fst")
df <- germany_data
df <- df %>%
  mutate(behave = ipbhprp,
         secure = impsafe,
         safety = ipstrgv,
         tradition = imptrad,
         rules = ipfrule) %>%
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
  # Apply the reverse coding
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))

# Now you can calculate 'schwartzauth' after the NA recoding
df$auth <- scales::rescale(df$behave + 
                      df$secure + 
                      df$safety + 
                      df$tradition + 
                      df$rules, to=c(0,100), na.rm=TRUE)


df <- df %>% filter(!is.na(auth))
df <- df %>%
  mutate(
    polID = case_when(
      lrscale %in% 0:3 ~ "Left",                    
      lrscale %in% 7:10 ~ "Right",                     
      lrscale %in% 4:6 ~ "Moderate",                  
      lrscale %in% c(77, 88, 99) ~ NA_character_      
    ),
   religious = case_when(
      rlgdgr %in% c(77, 88, 99) ~ NA_real_,
      TRUE ~ rlgdgr
    )
  )
model3 <- lm(auth ~ polID + religious, data = df, weights = weight)
model4 <- lm(auth ~ polID + religious + polID*religious, data = df, weights = weight)
modelsummary(
  list(model3, model4),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL)
 (1)   (2)
(Intercept) 57.5 (0.7)*** 59.6 (0.9)***
polIDModerate 5.3 (0.7)*** 2.6 (1.1)*
polIDRight 9.3 (1.1)*** 3.4 (1.9)+
religious 0.3 (0.1)** −0.3 (0.2)+
polIDModerate × religious 0.8 (0.2)**
polIDRight × religious 1.5 (0.4)***
Num.Obs. 2855 2855
R2 0.035 0.041
R2 Adj. 0.034 0.039
AIC 24680.0 24665.5
BIC 24709.7 24707.2
Log.Lik. −12334.978 −12325.764
RMSE 17.13 17.13

Interpretation for the MLR without the interaction:

Interpret model fit metrics for both models:

Task 2

Now generate the model4 interaction plot that we did in the tutorial, but again using the German data instead of the French. Interpret.

interaction_plot <- effect("polID*religious", model4, na.rm=TRUE)

plot(interaction_plot,
     main="Interaction effect",
     xlab="Religiousness",
     ylab="Authoritarian attitudes scale")

interaction_plot
## 
##  polID*religious effect
##           religious
## polID             0        2        5        8       10
##   Left     59.61010 58.93398 57.91980 56.90562 56.22950
##   Moderate 62.21299 63.14317 64.53843 65.93370 66.86388
##   Right    63.00456 65.26702 68.66072 72.05442 74.31689

Interpretation: in the interaction plot of France, lines are not parallel which means that there is an interaction effect. There is intersection between the authoritarian scale, religiousness, and political ideology. Also, both “Moderate” and “Right” show increased slopes while “Left” shows a decreased slope. “Moderate” increases from 62.2 to 66.9; “Right” increases from 63 to 74.3; “Left” decreases from 59.6 to 56.2. Hence, for “Right”, higher levels of religiousness have the most significant effect while for “Left” has the least effect. It shows that the effect of religiousness on the outcome varies depending on the people’s political ideology.

Task 3

Use data for the Netherlands and the variable coding for model5 and model6 from the tutorial (i.e., both for the cleaning & recode, as well as the linear model formulas). Do a modelsummary table displaying both model outputs. Interpret the coefficients for the MLR without the interaction (i.e., the first displayed model), and interpret model fit metrics for both models.

netherlands_data <- read_fst("netherlands_data.fst")
netherlands_data <- netherlands_data
netherlands_data <- netherlands_data %>%
  mutate(
    cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
    # Recoding generational cohorts based on the year of birth (yrbrn).
    # The year of birth is categorized into different generational cohorts.
    # Interwar (1900-1945), Baby Boomers (1946-1964), Gen X (1965-1979), Millennials (1980-1996).
    # The 'TRUE' line is a catch-all that keeps the original year of birth for those not in these ranges.
    gen = case_when(
      yrbrn %in% 1900:1945 ~ "1",
      yrbrn %in% 1946:1964 ~ "2",
      yrbrn %in% 1965:1979 ~ "3",
      yrbrn %in% 1980:1996 ~ "4",
      TRUE ~ as.character(yrbrn)  
    ),
    # After recoding, the gen variable is converted into a factor with labels for clearer interpretation.
    # Factors are used in R to handle categorical variables.
    gen = factor(gen,
                 levels = c("1", "2", "3", "4"),
                 labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
  )
table(netherlands_data$gen)
## 
##     Interwar Baby Boomers        Gen X  Millennials 
##         3967         6359         4684         2803
netherlands_data <- netherlands_data %>%
  mutate(behave = ipbhprp,
         secure = impsafe,
         safety = ipstrgv,
         tradition = imptrad,
         rules = ipfrule) %>%
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
  # Apply the reverse coding
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))

# Now you can calculate 'schwartzauth' after the NA recoding
netherlands_data$auth <- scales::rescale(netherlands_data$behave + 
                      netherlands_data$secure + 
                      netherlands_data$safety + 
                      netherlands_data$tradition + 
                      netherlands_data$rules, to=c(0,100), na.rm=TRUE)


netherlands_data <- netherlands_data %>% filter(!is.na(auth))
netherlands_data <- netherlands_data %>%
  mutate(religion = case_when(
    rlgblg == 2 ~ "No",
    rlgblg == 1 ~ "Yes",
    rlgblg %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(rlgblg)
  ))

# check
table(netherlands_data$religion)
## 
##    No   Yes 
## 10913  6750
netherlands_data <- netherlands_data %>%
  mutate(ID = case_when(
    lrscale >= 0 & lrscale <= 4 ~ "Left",
    lrscale >= 6 & lrscale <= 10 ~ "Right",
    lrscale > 10 ~ NA_character_,  # Set values above 10 as NA
    TRUE ~ NA_character_  # Ensure value 5 and any other unexpected values are set as NA
  ))
table(netherlands_data$ID)
## 
##  Left Right 
##  5449  7010
model5 <- lm(auth ~ religion + ID + gen, data = netherlands_data, weights = weight)
model6 <- lm(auth ~ religion + ID + gen + religion*gen, data = netherlands_data, weights = weight)
modelsummary(
  list(model5, model6),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
  coef_omit = "Intercept")
 (1)   (2)
religionYes 8.2 (0.9)*** 7.5 (2.1)***
IDRight 3.4 (0.9)*** 3.4 (0.9)***
genBaby Boomers −5.8 (1.3)*** −6.5 (1.7)***
genGen X −7.3 (1.4)*** −7.6 (1.8)***
genMillennials −8.6 (1.5)*** −8.6 (1.9)***
religionYes × genBaby Boomers 1.6 (2.6)
religionYes × genGen X 0.5 (2.8)
religionYes × genMillennials −0.6 (3.2)
Num.Obs. 1147 1147
R2 0.126 0.127
R2 Adj. 0.123 0.121
AIC 9479.3 9484.5
BIC 9514.7 9535.0
Log.Lik. −4732.669 −4732.251
RMSE 14.94 14.94

Interpretation for the MLR without the interaction:

Interpret model fit metrics for both models:

Task 4

Produce the model7 interaction plot from the tutorial, but again using the Netherlands data. Interpret.

netherlands_data <- netherlands_data %>%
  mutate(
    polID = case_when(
      lrscale %in% 0:3 ~ "Left",                    
      lrscale %in% 7:10 ~ "Right",                     
      lrscale %in% 4:6 ~ "Moderate",                  
      lrscale %in% c(77, 88, 99) ~ NA_character_      
    ),
   religious = case_when(
      rlgdgr %in% c(77, 88, 99) ~ NA_real_,
      TRUE ~ rlgdgr
    )
  )
model7 <- lm(auth ~ + cohort + polID + cohort*polID, data = netherlands_data, weights = weight)
interact_plot(model7, pred = cohort, modx = polID, jnplot = TRUE)

Interpretation: in the interaction plot of Netherlands, although “Left” and “Moderate” are parallel, “Right” intersects between them. Hence, there’s likely an interaction between the authoritarian scale, cohort, “Left”, “Moderate”, and “Right.” On the other hand, comparing their sloped, “Right” is the most tilted, and “Moderate” is more tilted than “Left.” “Right” decreases from over 70 to under 60; “Moderate” decreases from between 65 and 70 to almost 55; “Left” decreases from just over 60 to almost 55. It shows that the authoritarian scale of “Right” changes the most and “Left” changes the least.

Task 5

Produce the model8 interaction plot from the tutorial, but again using the Netherlands data. Interpret.

netherlands_data <- netherlands_data
model8 <- lm(auth ~ religious + ID + religious*ID, data = netherlands_data, weights = weight)
interact_plot(model8, pred = religious, modx = ID, jnplot = TRUE)

Interpretation: in the interaction plot of Netherlands, “Left” and “Right” are parallel. It shows that they don’t have ant interaction effect between the authoritarian scale, religious, “Left”, and “Right.” Besides, comparing their slope, they have similar slopes. They most likely tilt and increase the same. “Left” increases from around 55 to almost 70, and “Right” increases from around 60 to over 70.

End