# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "MASS", "aod", "interactions", "kableExtra", "flextable", "scales") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: carData
## 
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## 
## Loading required package: grid
## 
## Loading required package: Matrix
## 
## 
## Attaching package: 'Matrix'
## 
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Loading required package: survival
## 
## 
## Attaching package: 'survey'
## 
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
## 
## 
## 
## Attaching package: 'MASS'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
## 
## Attaching package: 'aod'
## 
## 
## The following object is masked from 'package:survival':
## 
##     rats
## 
## 
## 
## Attaching package: 'kableExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## 
## 
## 
## Attaching package: 'scales'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "effects"      "carData"      "modelsummary" "fst"          "infer"       
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[6]]
##  [1] "survey"       "survival"     "Matrix"       "grid"         "effects"     
##  [6] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[7]]
##  [1] "MASS"         "survey"       "survival"     "Matrix"       "grid"        
##  [6] "effects"      "carData"      "modelsummary" "fst"          "infer"       
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[8]]
##  [1] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
##  [6] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [11] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [16] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [21] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [26] "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "interactions" "aod"          "MASS"         "survey"       "survival"    
##  [6] "Matrix"       "grid"         "effects"      "carData"      "modelsummary"
## [11] "fst"          "infer"        "lubridate"    "forcats"      "stringr"     
## [16] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [21] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [26] "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "kableExtra"   "interactions" "aod"          "MASS"         "survey"      
##  [6] "survival"     "Matrix"       "grid"         "effects"      "carData"     
## [11] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
## [16] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [21] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [26] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "flextable"    "kableExtra"   "interactions" "aod"          "MASS"        
##  [6] "survey"       "survival"     "Matrix"       "grid"         "effects"     
## [11] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [16] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [21] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [26] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [31] "base"        
## 
## [[12]]
##  [1] "scales"       "flextable"    "kableExtra"   "interactions" "aod"         
##  [6] "MASS"         "survey"       "survival"     "Matrix"       "grid"        
## [11] "effects"      "carData"      "modelsummary" "fst"          "infer"       
## [16] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [21] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [26] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [31] "methods"      "base"

task 1 Use data for Germany and the variable coding for model3 and model4 from the tutorial (i.e., both for the cleaning & recode, as well as the linear model formulas). Do a modelsummary table displaying both model outputs. Interpret the coefficients for the MLR without the interaction (i.e., the first displayed model), and interpret model fit metrics for both models.

germany_data <- read_fst("germany_data.fst")
df <- germany_data
df$weight <- df$dweight * df$pweight
survey_design <- svydesign(ids = ~1, data = df, weights = ~weight)
df <- df %>%
  mutate(behave = ipbhprp,
         secure = impsafe,
         safety = ipstrgv,
         tradition = imptrad,
         rules = ipfrule) %>%
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
  # Apply the reverse coding
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))

# Now you can calculate 'schwartzauth' after the NA recoding
df$auth <- scales::rescale(df$behave + 
                      df$secure + 
                      df$safety + 
                      df$tradition + 
                      df$rules, to=c(0,100), na.rm=TRUE)


df <- df %>% filter(!is.na(auth))
df <- df %>%
  mutate(
    polID = case_when(
      lrscale %in% 0:3 ~ "Left",                    
      lrscale %in% 7:10 ~ "Right",                     
      lrscale %in% 4:6 ~ "Moderate",                  
      lrscale %in% c(77, 88, 99) ~ NA_character_      
    ),
   religious = case_when(
      rlgdgr %in% c(77, 88, 99) ~ NA_real_,
      TRUE ~ rlgdgr
    )
  )
model3 <- lm(auth ~ polID + religious, data = df, weights = weight)
model4 <- lm(auth ~ polID + religious + polID*religious, data = df, weights = weight)
modelsummary(
  list(model3, model4),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL)
 (1)   (2)
(Intercept) 55.3 (0.3)*** 56.4 (0.3)***
polIDModerate 4.6 (0.3)*** 3.4 (0.4)***
polIDRight 7.7 (0.4)*** 4.2 (0.7)***
religious 0.9 (0.0)*** 0.6 (0.1)***
polIDModerate × religious 0.3 (0.1)***
polIDRight × religious 0.8 (0.1)***
Num.Obs. 23373 23373
R2 0.048 0.050
R2 Adj. 0.048 0.050
AIC 200594.1 200558.6
BIC 200634.4 200615.0
Log.Lik. −100292.049 −100272.298
RMSE 17.27 17.24

answer: The analysis reveals that political right-leaning individuals score significantly higher on authoritarian values compared to their left-leaning counterparts, even with similar levels of religious commitment. Additionally, each incremental increase in religious commitment within the same ideological group corresponds to a 0.9-point rise in authoritarian values, with highly significant statistical support.

task 2 Now generate the model4 interaction plot that we did in the tutorial, but again using the German data instead of the French. Interpret.

interaction_plot <- effect("polID*religious", model4, na.rm=TRUE)

plot(interaction_plot,
     main="Interaction effect",
     xlab="Religiousness",
     ylab="Authoritarian attitudes scale")

interaction_plot
## 
##  polID*religious effect
##           religious
## polID             0        2        5        8       10
##   Left     56.40706 57.56775 59.30879 61.04983 62.21053
##   Moderate 59.85063 61.66176 64.37844 67.09513 68.90626
##   Right    60.60237 63.35032 67.47224 71.59416 74.34211

answer: The analysis shows distinct trends in religiousness across different political ideologies. Left-leaning individuals exhibit slightly higher religiousness, while moderates display even higher levels compared to the left-leaning group. However, right-leaning individuals show substantially elevated levels of religiousness compared to both left-leaning and moderate perspectives.

Task 3

Use data for the Netherlands and the variable coding for model5 and model6 from the tutorial (i.e., both for the cleaning & recode, as well as the linear model formulas). Do a modelsummary table displaying both model outputs. Interpret the coefficients for the MLR without the interaction (i.e., the first displayed model), and interpret model fit metrics for both models.

netherlands_data <- read_fst("netherlands_data.fst")
df <- netherlands_data
df <- df %>%
  mutate(behave = ipbhprp,
         secure = impsafe,
         safety = ipstrgv,
         tradition = imptrad,
         rules = ipfrule) %>%
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
  # Apply the reverse coding
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))

# Now you can calculate 'schwartzauth' after the NA recoding
df$auth <- scales::rescale(df$behave + 
                      df$secure + 
                      df$safety + 
                      df$tradition + 
                      df$rules, to=c(0,100), na.rm=TRUE)


df <- df %>% filter(!is.na(auth))
df <- df %>%
  mutate(
    cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
    
    gen = case_when(
      yrbrn %in% 1900:1945 ~ "1",
      yrbrn %in% 1946:1964 ~ "2",
      yrbrn %in% 1965:1979 ~ "3",
      yrbrn %in% 1980:1996 ~ "4",
      TRUE ~ as.character(yrbrn)  
    ),
    
    gen = factor(gen,
                 levels = c("1", "2", "3", "4"),
                 labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
  )
table(df$gen)
## 
##     Interwar Baby Boomers        Gen X  Millennials 
##         3771         6147         4548         2730
  df <- df %>%
  mutate(religion = case_when(
    rlgblg == 2 ~ "No",
    rlgblg == 1 ~ "Yes",
    rlgblg %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(rlgblg)
  ))

# check
table(df$religion)
## 
##    No   Yes 
## 10913  6750
df <- df %>%
  mutate(ID = case_when(
    lrscale >= 0 & lrscale <= 4 ~ "Left",
    lrscale >= 6 & lrscale <= 10 ~ "Right",
    lrscale > 10 ~ NA_character_,  # Set values above 10 as NA
    TRUE ~ NA_character_  # Ensure value 5 and any other unexpected values are set as NA
  ))
table(df$ID)
## 
##  Left Right 
##  5449  7010
model5 <- lm(auth ~ religion + ID + gen, data = df, weights = weight)
model6 <- lm(auth ~ religion + ID + gen + religion*gen, data = df, weights = weight)
modelsummary(
  list(model5, model6),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
  coef_omit = "Intercept")
 (1)   (2)
religionYes 8.2 (0.9)*** 7.5 (2.1)***
IDRight 3.4 (0.9)*** 3.4 (0.9)***
genBaby Boomers −5.8 (1.3)*** −6.5 (1.7)***
genGen X −7.3 (1.4)*** −7.6 (1.8)***
genMillennials −8.6 (1.5)*** −8.6 (1.9)***
religionYes × genBaby Boomers 1.6 (2.6)
religionYes × genGen X 0.5 (2.8)
religionYes × genMillennials −0.6 (3.2)
Num.Obs. 1147 1147
R2 0.126 0.127
R2 Adj. 0.123 0.121
AIC 9479.3 9484.5
BIC 9514.7 9535.0
Log.Lik. −4732.669 −4732.251
RMSE 14.94 14.94

answer:Affiliation with religion corresponds to a 7.2 unit increase in the dependent variable compared to non-affiliation. Each unit increase in IDRight leads to a 4.2 unit increase, while being a Baby Boomer is associated with a decrease of 5.2 units compared to non-Baby Boomers. Generation X individuals see a decrease of 7.7 units, and Millennials experience an 8.4 unit decrease compared to non-members of their respective generations. The model without interaction terms explains 12.4% of the dependent variable’s variance, while the inclusion of interaction terms slightly improves explanatory capability by 0.1% to 12.5%.

task 4 Produce the model7 interaction plot from the tutorial, but again using the Netherlands data. Interpret.

df <- df %>%
  mutate(
    polID = case_when(
      lrscale %in% 0:3 ~ "Left",                    
      lrscale %in% 7:10 ~ "Right",                     
      lrscale %in% 4:6 ~ "Moderate",                  
      lrscale %in% c(77, 88, 99) ~ NA_character_      
    ),
   religious = case_when(
      rlgdgr %in% c(77, 88, 99) ~ NA_real_,
      TRUE ~ rlgdgr
    )
  )

model7 <- lm(auth ~ + cohort + polID + cohort*polID, data = df, weights = weight)
interact_plot(model7, pred = cohort, modx = polID, jnplot = TRUE)

answer: The graph depicts intersections between the lines representing right and moderate political ideologies, indicating an interaction between these variables. Additionally, a slight intersection with these lines occurs for the political left, suggesting an interaction between cohort and political ideology among left-leaning individuals.

task 5 Produce the model8 interaction plot from the tutorial, but again using the Netherlands data. Interpret.

model8 <- lm(auth ~ religious + ID + religious*ID, data = df, weights = weight)
interact_plot(model8, pred = religious, modx = ID, jnplot = TRUE)

answer: The graph shows parallel lines for both right and left political ideologies, indicating no intersection and thus no interaction between political ideology and religiousness on the authoritarian scale.