# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "interactions") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: carData
## 
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## 
## Loading required package: grid
## 
## Loading required package: Matrix
## 
## 
## Attaching package: 'Matrix'
## 
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Loading required package: survival
## 
## 
## Attaching package: 'survey'
## 
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "effects"      "carData"      "modelsummary" "fst"          "infer"       
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[6]]
##  [1] "survey"       "survival"     "Matrix"       "grid"         "effects"     
##  [6] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[7]]
##  [1] "interactions" "survey"       "survival"     "Matrix"       "grid"        
##  [6] "effects"      "carData"      "modelsummary" "fst"          "infer"       
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"
rm(list=ls()); gc() 
##           used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells 2631066 140.6    4405794 235.3         NA  4405794 235.3
## Vcells 4473580  34.2   10146329  77.5      16384  7430444  56.7
ess <- read_fst("All-ESS-Data.fst")

Task 1: Use data for Germany and the variable coding for model3 and model4 from the tutorial (i.e., both for the cleaning & recode, as well as the linear model formulas). Do a modelsummary table displaying both model outputs. Interpret the coefficients for the MLR without the interaction (i.e., the first displayed model), and interpret model fit metrics for both models.

germany_data <- read_fst("germany_data.fst")
df <- germany_data
germany_data <- read_fst("germany_data.fst")
df <- germany_data

df <- df %>%
  mutate(behave = ipbhprp,
         secure = impsafe,
         safety = ipstrgv,
         tradition = imptrad,
         rules = ipfrule) %>%
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
  # Apply the reverse coding
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))

df$auth <- scales::rescale(df$behave + 
                      df$secure + 
                      df$safety + 
                      df$tradition + 
                      df$rules, to=c(0,100), na.rm=TRUE)

df <- df %>%
  mutate(
    polID = case_when(
      lrscale %in% 0:3 ~ "Left",                    
      lrscale %in% 7:10 ~ "Right",                     
      lrscale %in% 4:6 ~ "Moderate",                  
      lrscale %in% c(77, 88, 99) ~ NA_character_      
    ),
   religious = case_when(
      rlgdgr %in% c(77, 88, 99) ~ NA_real_,
      TRUE ~ rlgdgr
    )
  )

df <- df %>% filter(!is.na(auth))

model3 <- lm(auth ~ polID + religious, data = df, weights = weight)
model4 <- lm(auth ~ polID + religious + polID*religious, data = df, weights = weight)

modelsummary(
  list(model3, model4),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL)
 (1)   (2)
(Intercept) 57.5 (0.7)*** 59.6 (0.9)***
polIDModerate 5.3 (0.7)*** 2.6 (1.1)*
polIDRight 9.3 (1.1)*** 3.4 (1.9)+
religious 0.3 (0.1)** −0.3 (0.2)+
polIDModerate × religious 0.8 (0.2)**
polIDRight × religious 1.5 (0.4)***
Num.Obs. 2855 2855
R2 0.035 0.041
R2 Adj. 0.034 0.039
AIC 24680.0 24665.5
BIC 24709.7 24707.2
Log.Lik. −12334.978 −12325.764
RMSE 17.13 17.13

According to PollDModerate (5.3), the political moderates are 5.3 units ahead of the left (the reference category). According to PollDRight (9.3), the average difference between those who identify as more left-sided and those who identify as right-sided is 9.3 units.Given that the religious predictor is 0.3 (on the authoritarian values scale), an increase of one unit in religiousness will result in a corresponding 0.3 rise in the predicted average.The p-value is less than 0.001, which indicates that there is less than 0.1% chance that the observed result is the result of chance, according to the *** for each coefficient. Erroneously, the null hypothesis is rejected.The adjusted R-squared fit score is 0.034 (less than 1), indicating that there isn’t an optimal predictor in the model.

task 2: Now generate the model4 interaction plot that we did in the tutorial, but again using the German data instead of the French. Interpret.

germany_data <- read_fst("germany_data.fst")

interaction_plot <- effect("polID*religious", model4, na.rm=TRUE)

plot(interaction_plot,
     main="Interaction effect",
     xlab="Religiousness",
     ylab="Authoritarian attitudes scale")

interaction_plot
## 
##  polID*religious effect
##           religious
## polID             0        2        5        8       10
##   Left     59.61010 58.93398 57.91980 56.90562 56.22950
##   Moderate 62.21299 63.14317 64.53843 65.93370 66.86388
##   Right    63.00456 65.26702 68.66072 72.05442 74.31689

The pollD Left’s falling slope suggests that, in accordance with the Authoritarian views scale, a higher percentage of those who identify as politically left also identify as less religious. The modest upward slope of the PollD moderate scale indicates that those who identify as politically moderate also identify as slightly more religious. Finally, based on the Authoritarian views scale, the pollD Right’s significant upward slope indicates that a higher percentage of those who identify as politically right also hold stronger religious values.

task 3: Use data for the Netherlands and the variable coding for model5 and model6 from the tutorial (i.e., both for the cleaning & recode, as well as the linear model formulas). Do a modelsummary table displaying both model outputs. Interpret the coefficients for the MLR without the interaction (i.e., the first displayed model), and interpret model fit metrics for both models.

netherlands_data <- read_fst("netherlands_data.fst")

df <- netherlands_data
df <- df %>%
  mutate(religion = case_when(
    rlgblg == 2 ~ "No",
    rlgblg == 1 ~ "Yes",
    rlgblg %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(rlgblg)
  ))

table(df$religion)
## 
##    No   Yes 
## 11289  7008
df <- df %>%
  mutate(ID = case_when(
    lrscale >= 0 & lrscale <= 4 ~ "Left",
    lrscale >= 6 & lrscale <= 10 ~ "Right",
    lrscale > 10 ~ NA_character_,  # Set values above 10 as NA
    TRUE ~ NA_character_  # Ensure value 5 and any other unexpected values are set as NA
  ))
table(df$ID)
## 
##  Left Right 
##  5605  7247
df <- df %>%
  mutate(
    
cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),

  gen = case_when(
    yrbrn %in% 1900:1945 ~ "1",
    yrbrn %in% 1946:1964 ~ "2",
    yrbrn %in% 1965:1979 ~ "3",
    yrbrn %in% 1980:1996 ~ "4",
    TRUE ~ as.character(yrbrn)  
    ),

gen = factor(gen,
  levels = c("1", "2", "3", "4"),
  labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials")),
)

table(df$gen)
## 
##     Interwar Baby Boomers        Gen X  Millennials 
##         3967         6359         4684         2803
df <- df %>%
  mutate(behave = ipbhprp,
         secure = impsafe,
         safety = ipstrgv,
         tradition = imptrad,
         rules = ipfrule) %>%
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%

  mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))

df$auth <- scales::rescale(df$behave + 
                      df$secure + 
                      df$safety + 
                      df$tradition + 
                      df$rules, to=c(0,100), na.rm=TRUE)

df <- df %>% filter(!is.na(auth))

df <- df %>%
  mutate(
    polID = case_when(
      lrscale %in% 0:3 ~ "Left",                    
      lrscale %in% 7:10 ~ "Right",                     
      lrscale %in% 4:6 ~ "Moderate",                  
      lrscale %in% c(77, 88, 99) ~ NA_character_      
    ),
   religious = case_when(
      rlgdgr %in% c(77, 88, 99) ~ NA_real_,
      TRUE ~ rlgdgr
    )
  )
model5 <- lm(auth ~ religion + ID + gen, data = df, weights = weight)
model6 <- lm(auth ~ religion + ID + gen + religion*gen, data = df, weights = weight)
modelsummary(
  list(model5, model6),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
  coef_omit = "Intercept")
 (1)   (2)
religionYes 8.2 (0.9)*** 7.5 (2.1)***
IDRight 3.4 (0.9)*** 3.4 (0.9)***
genBaby Boomers −5.8 (1.3)*** −6.5 (1.7)***
genGen X −7.3 (1.4)*** −7.6 (1.8)***
genMillennials −8.6 (1.5)*** −8.6 (1.9)***
religionYes × genBaby Boomers 1.6 (2.6)
religionYes × genGen X 0.5 (2.8)
religionYes × genMillennials −0.6 (3.2)
Num.Obs. 1147 1147
R2 0.126 0.127
R2 Adj. 0.123 0.121
AIC 9479.3 9484.5
BIC 9514.7 9535.0
Log.Lik. −4732.669 −4732.251
RMSE 14.94 14.94

interpreting the co-efficients

All the coefficients are significant with ***, indicating a p < 0.001. Meaning: There is less than a 0.1% probability that the observed result is due to chance and we incorrectly reject the null, under frequentist assumptions.

religionYes (7.2): When comparing individuals with the same political orientation, the average authoritarian score for individuals who are religious are 7.2 units higher compared to the reference category of not religious.

IDRight (4.2): When comparing individuals with the same religiousness, individuals identifying as politically right are expected to be, on average, 4.2 units higher than the reference category of politically left on the authoritarian values scale.

genBaby Boomers (-5.2): When comparing individuals with the same religiousness, Baby Boomers are expected to be, on average, 5.2 units lower than the reference category of Interwar gen on the authoritarian values scale.

genGen X (-7.7): When comparing individuals with the same religiousness, Gen X are expected to be, on average, 7.7 units lower than the reference category of Interwar gen on the authoritarian values scale.

genMillennials (-8.4): When comparing individuals with the same religiousness, Millennials are expected to be, on average, 8.4 units lower than the reference category of Interwar gen on the authoritarian values scale. Millennials show the strongest decline. ## Interpretation of Fit Metrics Adjusted R-Squared (0.124 vs. 0.125): Model 1 explains 12.4% of variance in the outcome (auth scores) based on our religion and polID variables, which in comparison to model 2 that explains 12.5% is slightly less useful.

AIC (101219.3 vs. 101213.5): Model 1 has a higher AIC than model 2 with a 5.8 point difference, which means model 2 is a better descriptor of models with same dataset, as it has a lower AIC of 101213.5, indicating slightly better fit.

BIC (101271.2 vs. 101287.5): Model 1 also has a lower BIC at 101271.2, which penalizes for any additional parameters added. It suggests that model 1 has slightly good fit.

Log.Lik. (-50602.672 vs. -50596.757): Model 2 has lower Log.Lik. at -50596.757 than model 1, indicating that model 2 is a better fit.

RMSE (15.27 vs. 15.26): Finally, model 1 is 0.01 points higher than model 2, suggesting average magnitude of residuals is slightly higher for model 1, so model 2 is overall a better fit.

task 4: Produce the model7 interaction plot from the tutorial, but again using the Netherlands data. Interpret.

df <- df %>%
  mutate(
    polID = case_when(
      lrscale %in% 0:3 ~ "Left",                    
      lrscale %in% 7:10 ~ "Right",                     
      lrscale %in% 4:6 ~ "Moderate",                  
      lrscale %in% c(77, 88, 99) ~ NA_character_      
    ),
   religious = case_when(
      rlgdgr %in% c(77, 88, 99) ~ NA_real_,
      TRUE ~ rlgdgr
    )
  )
model7 <- lm(auth ~ + cohort + polID + cohort*polID, data = df, weights = weight)
interact_plot(model7, pred = cohort, modx = polID, jnplot = TRUE)

There is a stronger interaction effect between identifying as right and moderate for political orientation and authoritarian scores for each cohort in Netherlands because each line isn’t parallel. As each cohort progresses, authoritarian scores all decline, however, the decline is steeper for if you identify as right and moderate leaning than for those who identify on the left, with the right experiencing the sharpest decrease. It almost intersects with moderate scores, suggesting cohort has a strong effect on the positions of left and right leaning.

task 5: Produce the model8 interaction plot from the tutorial, but again using the Netherlands data. Interpret.

model8 <- lm(auth ~ religious + ID + religious*ID, data = df, weights = weight)
interact_plot(model8, pred = religious, modx = ID, jnplot = TRUE)

positively correlated in the Netherlands among both Left- and Right-oriented people, but the association is noticeably larger among the former. This kind of data, which illustrates how various socioeconomic groups may react to religious circumstances under an authoritarian framework, may be helpful for sociological research, political analysis, or policymaking.

END !!