Tutorial 8 (222)

Initial set-up

# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "MASS", "aod", "interactions", "kableExtra", "flextable", "scales") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

## Warning: package 'infer' was built under R version 4.3.3

## Warning: package 'effects' was built under R version 4.3.3

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.3.3

## lattice theme set by effectsTheme()
## See ?effectsTheme for details.

## Warning: package 'survey' was built under R version 4.3.3

## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading required package: survival
## 
## Attaching package: 'survey'
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
## 
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select

## Warning: package 'aod' was built under R version 4.3.3

## 
## Attaching package: 'aod'
## 
## The following object is masked from 'package:survival':
## 
##     rats

## Warning: package 'interactions' was built under R version 4.3.3

## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## 
## Attaching package: 'flextable'
## 
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## 
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "effects"      "carData"      "modelsummary" "fst"          "infer"       
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[6]]
##  [1] "survey"       "survival"     "Matrix"       "grid"         "effects"     
##  [6] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[7]]
##  [1] "MASS"         "survey"       "survival"     "Matrix"       "grid"        
##  [6] "effects"      "carData"      "modelsummary" "fst"          "infer"       
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[8]]
##  [1] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
##  [6] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [11] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [16] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [21] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [26] "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "interactions" "aod"          "MASS"         "survey"       "survival"    
##  [6] "Matrix"       "grid"         "effects"      "carData"      "modelsummary"
## [11] "fst"          "infer"        "lubridate"    "forcats"      "stringr"     
## [16] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [21] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [26] "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "kableExtra"   "interactions" "aod"          "MASS"         "survey"      
##  [6] "survival"     "Matrix"       "grid"         "effects"      "carData"     
## [11] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
## [16] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [21] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [26] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "flextable"    "kableExtra"   "interactions" "aod"          "MASS"        
##  [6] "survey"       "survival"     "Matrix"       "grid"         "effects"     
## [11] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [16] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [21] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [26] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [31] "base"        
## 
## [[12]]
##  [1] "scales"       "flextable"    "kableExtra"   "interactions" "aod"         
##  [6] "MASS"         "survey"       "survival"     "Matrix"       "grid"        
## [11] "effects"      "carData"      "modelsummary" "fst"          "infer"       
## [16] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [21] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [26] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [31] "methods"      "base"

Loading dataset

france_data <- read_fst("france_data.fst")

Renaming dataset to df.

df <- france_data

Note: you have both df and france_data which contain the french data (same observations, under different names). So you can always revert back to france_data if needed

Survey weights

From ess website:

For data collection the ess uses strictly probability-based samples. Every element in the ess target population should therefore have a greater than zero probability of being included into the sample. When analysing ess data estimates, the likelihood of each respondent to be part of the sample should also be taken into account - which means that the most accurate estimates will be obtained only after weighting the data.

Link: https://www.europeansocialsurvey.org/methodology/ess-methodology/data-processing-and-archiving/weighting

df$weight <- df$dweight * df$pweight
survey_design <- svydesign(ids = ~1, data = df, weights = ~weight)

Multiple linear regression

We will work with our populist attitudes scale based on Norris and Inglehart’s conceptualization and measurement again. So let’s run our code:

Setting up our populist scale

# Setting values greater than 10 to NA
df$trstplt <- ifelse(df$trstplt > 10, NA, df$trstplt)
df$trstprl <- ifelse(df$trstprl > 10, NA, df$trstprl)
df$trstprt <- ifelse(df$trstprt > 10, NA, df$trstprt)

# Creating and rescaling the trust variable
df$trust <- scales::rescale(df$trstplt + df$trstprl + df$trstprt, na.rm = TRUE, to = c(0, 100))

# Here's how you would "flip it" because populist is conceived by N + I as 'anti-trust'
# As Schafer explains there are some issues with that conceptualization, but N + I is also widely applied
df$populist <- scales::rescale(df$trust, na.rm = TRUE, to=c(100,0))

Now, let’s suppose we read literature about the importance of social capital, political interest, and generational trends for understanding populism.

Let’s recode the variables and see if that is supported.

df <- df %>%
  mutate(
    cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
    # Recoding generational cohorts based on the year of birth (yrbrn).
    # The year of birth is categorized into different generational cohorts.
    # Interwar (1900-1945), Baby Boomers (1946-1964), Gen X (1965-1979), Millennials (1980-1996).
    # The 'TRUE' line is a catch-all that keeps the original year of birth for those not in these ranges.
    gen = case_when(
      yrbrn %in% 1900:1945 ~ "1",
      yrbrn %in% 1946:1964 ~ "2",
      yrbrn %in% 1965:1979 ~ "3",
      yrbrn %in% 1980:1996 ~ "4",
      TRUE ~ as.character(yrbrn)  
    ),
    # After recoding, the gen variable is converted into a factor with labels for clearer interpretation.
    # Factors are used in R to handle categorical variables.
    gen = factor(gen,
                 levels = c("1", "2", "3", "4"),
                 labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
  )
table(df$gen)

## 
##     Interwar Baby Boomers        Gen X  Millennials 
##         3999         6351         4821         3308

# The dataframe is further updated to recode trust and political interest # note: you could have done it one, integrating above, but I broke it down
df <- df %>%
  mutate(
    # Recoding the 'ppltrst' variable, which seems to measure trust on a scale from 0 to 10.
    # Special codes like 77, 88, 99 are probably used for missing data and are set to NA.
    # Trust levels are categorized as 'Low', 'Mid', and 'High'.
    Capital = case_when(
      ppltrst %in% c(77, 88, 99) ~ NA_character_,
      ppltrst >= 0 & ppltrst <= 3 ~ "Low",
      ppltrst >= 4 & ppltrst <= 6 ~ "Mid",
      ppltrst >= 7 & ppltrst <= 10 ~ "High",
      TRUE ~ as.character(ppltrst)
    ),
    # Recoding 'polintr' variable, which seems to measure political interest.
    # Values of 1 and 2 indicate 'Interested', 3 and 4 'Not Interested'.
    # Special codes like 7, 8, 9 (likely missing data) are set to NA.
    Politically = case_when(
      polintr %in% c(1, 2) ~ "Interested",
      polintr %in% c(3, 4) ~ "Not Interested",
      polintr %in% c(7, 8, 9) ~ NA_character_
    )
  )

Running models

SLR vs. MLR

We will run a simple linear regression (one predictor) and multiple linear regression (two predictors) to compare the output.

Remember, it is lm() for linear regression model, then in the parentheses(outcome ~ predictor1 + predictor2…, data = dataname)

model1 <- lm(populist ~ gen, data = df, weights = weight)
model2 <- lm(populist ~ Capital + Politically, data = df, weights = weight)
modelsummary(
  list(model1, model2), # you can add as many models as you have generated by putting them in this list. But you MUST refer to them to their stored name. So if you named your model say Regression1, you would have to put that instead of model1. By default, you can use the naming convention we rely upon of model1, model2, ...
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}", # what we are specifying here is what we want to show in our model summary table
                  "{estimate} ({std.error}){stars}"),
  statistic = NULL)

	(1)	(2)
(Intercept)	62.6 (0.4)***	52.7 (0.3)***
genBaby Boomers	2.6 (0.4)***
genGen X	3.3 (0.5)***
genMillennials	1.6 (0.5)***
CapitalLow		16.0 (0.4)***
CapitalMid		6.7 (0.4)***
PoliticallyNot Interested		6.3 (0.3)***
Num.Obs.	16588	17077
R2	0.003	0.124
R2 Adj.	0.003	0.123
AIC	147175.4	149264.0
BIC	147214.0	149302.8
Log.Lik.	−73582.705	−74627.015
RMSE	19.41	18.19

All the coefficients are significant with ***, indicating a p < 0.001. Meaning: There is less than a 0.1% probability that the observed result is due to chance and we incorrectly reject the null, under frequentist assumptions.

What we note here is that the R-squared and adjusted R-squared are noticeably higher for the second model – put differently, the first model does not explain much variance in the outcome. However, the AIC and BIC are higher for the second model, highlighting that the increase in variance explained is not justified (there is greater complexity or potential underlying issues that the AIC and BIC do not like, comparatively). In sum: it suggests issues with the second model. This is associated to conceptual overlap in what our proxy for social capital captures (trust in other people, and our populist scale which are made of trust items) as well as other issues we will look into in the next tutorial.

The takeaway is that you cannot just look at significance stars, nor just the R2 and R2 adj. You have to think about your models and interpret all the output.

MLR and interactions

Let’s turn to modelling something else. We will use the authoritarian scale, again as conceptualized by Norris and Inglehart.

df <- df %>%
  mutate(behave = ipbhprp,
         secure = impsafe,
         safety = ipstrgv,
         tradition = imptrad,
         rules = ipfrule) %>%
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
  # Apply the reverse coding
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))

# Now you can calculate 'schwartzauth' after the NA recoding
df$auth <- scales::rescale(df$behave + 
                      df$secure + 
                      df$safety + 
                      df$tradition + 
                      df$rules, to=c(0,100), na.rm=TRUE)


df <- df %>% filter(!is.na(auth))

Let’s add some covariates of interest, which will be our previously recoded lrscale with three categories (left/moderate/right), as well as a cleaned version of ‘how religious’ respondents are (from 0-10). Here, we are testing ideas related to the political right and being more religious as associated to authoritarian attitudes.

df <- df %>%
  mutate(
    polID = case_when(
      lrscale %in% 0:3 ~ "Left",                    
      lrscale %in% 7:10 ~ "Right",                     
      lrscale %in% 4:6 ~ "Moderate",                  
      lrscale %in% c(77, 88, 99) ~ NA_character_      
    ),
   religious = case_when(
      rlgdgr %in% c(77, 88, 99) ~ NA_real_,
      TRUE ~ rlgdgr
    )
  )

Note: In our code above we renamed our variables polID and religious. You can always name variables to whatever you prefer (before the = case_when), notably to more easily track what the variables are and not make typos if you write them out in your models.

Modelling

We will use modelsummary here and specify two models (MLR and MLR with interaction). We will look into the possibility of an interaction between religiousness and ideological self-placement as left, moderate, and right.

First, let’s run our models.

model3 <- lm(auth ~ polID + religious, data = df, weights = weight)
model4 <- lm(auth ~ polID + religious + polID*religious, data = df, weights = weight)

Next, let’s use the modelsummary function to generate our table. Note: remember, as we did in the previous tutorial, the finalized version should have a title and coefficients renamed in terms of how they are displayed. By default you have the variable name + category (when it’s a categorical variable). So, for example, polIDRight as a listed coefficient.

modelsummary(
  list(model3, model4),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL)

	(1)	(2)
(Intercept)	48.7 (0.3)***	47.8 (0.4)***
polIDModerate	3.9 (0.3)***	4.9 (0.5)***
polIDRight	8.1 (0.4)***	10.1 (0.7)***
religious	1.6 (0.0)***	1.9 (0.1)***
polIDModerate × religious		−0.3 (0.1)**
polIDRight × religious		−0.5 (0.1)***
Num.Obs.	17049	17049
R2	0.117	0.117
R2 Adj.	0.116	0.117
AIC	148261.6	148248.3
BIC	148300.3	148302.5
Log.Lik.	−74125.790	−74117.141
RMSE	17.85	17.85

Let’s first interpret the MLR coefficient output before turning to a discussion of the interaction output, which we will also visualize.

polIDModerate (3.9): This coefficient suggests that, comparing individuals with the same religiousness (i.e., what some term “holding constant”), the average outcome for individuals identifying as politically moderate are 3.9 units higher compared to the reference category (in our case, identifying ideologically as on the ‘left’). The “baseline group” refers to the category that is omitted, and is thus interpreted as part of our intercept.
polIDRight (8.1): This coefficient indicates that, comparing individuals with the same religiousness, individuals identifying as politically right are expected to be, on average, 8.1 units higher than the reference category (i.e., identifying ideologically as on the ‘left’) on the authoritarian values scale.
religious (1.6): For the ‘religious’ predictor, a coefficient of 1.6 means that with every one-unit increase in religiousness, there is an associated expected average increase of 1.6 units on the authoritarian values scale, assuming that we are comparing people with the same ideological categorization (i.e., holding ideological self-placement constant).

All three coefficients have ***, indicating a p < 0.001. Meaning: There is less than a 0.1% probability that the observed result is due to chance and we incorrectly reject the null, under frequentist assumptions.

Additional note: When interpreting the intercept (48.7 in the MLR), it represents the expected value of the outcome variable when all the predictor variables are set to their reference levels (so polID as ‘left’, in our case, although you can change that) and, for continuous variables, when they are set to zero (so religiousness as 0, in our case). For categorical variables, the reference level is the one that is left out of the model – i.e., you would not see it displayed in your regression table.

Let’s talk about interactions

What do you note in terms of the model fit metrics above, including the R-squared?

That’s right, not much changes.

We do observe that the interaction coefficients are statistically significant but the coefficients themselves are really small. What does it all mean?

Let’s visualize and use our rule of thumb we discussed in the lecture to explore.

As a reminder:

–> if lines are parallel = no interaction effect

–> if lines are not parallel = interaction effect

interaction_plot <- effect("polID*religious", model4, na.rm=TRUE)

plot(interaction_plot,
     main="Interaction effect",
     xlab="Religiousness",
     ylab="Authoritarian attitudes scale")

interaction_plot

## 
##  polID*religious effect
##           religious
## polID             0        2        5        8       10
##   Left     47.83397 51.61435 57.28491 62.95547 66.73585
##   Moderate 52.73980 55.91746 60.68395 65.45044 68.62810
##   Right    57.94686 60.75180 64.95921 69.16663 71.97157

What’s your takeaway? Remember: they do not need to start and end at the same points, it’s about comparing the slopes and if you see one having a discernably not parallel trend.

Let’s showcase an interaction where the lines are less parallel, relatively speaking.

This will also help us emphasize an additional point. There are many decisions that go into modelling. Below, instead of religious, we will use a predictor about whether the respondent belongs to a particular religion or not, and recode lrscale not as left/moderate/right but as left and right, omitting 5. These are all decisions that would you need to be explained and justified. In all quantitative modelling, there are many researcher degrees of freedom, including how we choose to handle variables. These need to be considered, made transparent, and justified. Always.

df <- df %>%
  mutate(religion = case_when(
    rlgblg == 2 ~ "No",
    rlgblg == 1 ~ "Yes",
    rlgblg %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(rlgblg)
  ))

# check
table(df$religion)

## 
##   No  Yes 
## 9071 9102

df <- df %>%
  mutate(ID = case_when(
    lrscale >= 0 & lrscale <= 4 ~ "Left",
    lrscale >= 6 & lrscale <= 10 ~ "Right",
    lrscale > 10 ~ NA_character_,  # Set values above 10 as NA
    TRUE ~ NA_character_  # Ensure value 5 and any other unexpected values are set as NA
  ))
table(df$ID)

## 
##  Left Right 
##  6308  5704

model5 <- lm(auth ~ religion + ID + gen, data = df, weights = weight)
model6 <- lm(auth ~ religion + ID + gen + religion*gen, data = df, weights = weight)

Before we visualize, what do you note about the interaction coefficients and the model fit metrics?

Now, let’s visualize:

interaction_plot <- effect("religion*gen", model6, na.rm=TRUE)

plot(interaction_plot,
     main="Interaction effect",
     xlab="Generations",
     ylab="Authoritarian attitudes scale")

interaction_plot

## 
##  religion*gen effect
##         gen
## religion Interwar Baby Boomers    Gen X Millennials
##      No  62.50524     55.23699 52.74993    50.60570
##      Yes 68.80259     62.90327 62.23297    62.56862

Consider specifically the trend line from Boomers to Millennials between religion = No vs. religion = Yes.

Here’s a different way to visualize:

cat_plot(model6, pred = gen, modx = religion, jnplot = TRUE)

We could also do cohort and polID:

model7 <- lm(auth ~ + cohort + polID + cohort*polID, data = df, weights = weight)
interact_plot(model7, pred = cohort, modx = polID, jnplot = TRUE)

Or religious and ID:

model8 <- lm(auth ~ religious + ID + religious*ID, data = df, weights = weight)
interact_plot(model8, pred = religious, modx = ID, jnplot = TRUE)

What do you note, generally? Which one would you say is most indicative of an interaction effect?

Homework 5 (5%): due by next lecture on March 26th

Instructions: Start a new R markdown for the homework and call it “Yourlastname_Firstname_Homework_5”.

Copy everything below from Task 1 to Task 5. Keep the task prompt and questions, and provide your code and answer underneath.

Remember: you need all the steps for your code to work, including loading your data – otherwise it will not knit.

To generate a new code box, click on the +C sign above. Underneath your code, provide your answer to the task question.

When you are done, click on “Knit” above, then “Knit to Html”. Wait for everything to compile. If you get an error like “Execution halted”, it means there are issues with your code you must fix. When all issues are fixed, it will prompt a new window. Then click on “Publish” in the top right, and then Rpubs (the first option) and follow the instructions to create your Rpubs account and get your Rpubs link for your document (i.e., html link as I provide for the tutorial).

Note: Make sure to provide both your markdown file and R pubs link. If you do not submit both, you will be penalized 2 pts. out of the 5 pts. total.

Task 1

Use data for Germany and the variable coding for model3 and model4 from the tutorial (i.e., both for the cleaning & recode, as well as the linear model formulas). Do a modelsummary table displaying both model outputs. Interpret the coefficients for the MLR without the interaction (i.e., the first displayed model), and interpret model fit metrics for both models.

# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "MASS", "aod", "interactions", "kableExtra", "flextable", "scales") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)

## [[1]]
##  [1] "fstcore"      "scales"       "flextable"    "kableExtra"   "interactions"
##  [6] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[2]]
##  [1] "fstcore"      "scales"       "flextable"    "kableExtra"   "interactions"
##  [6] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[3]]
##  [1] "fstcore"      "scales"       "flextable"    "kableExtra"   "interactions"
##  [6] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[4]]
##  [1] "fstcore"      "scales"       "flextable"    "kableExtra"   "interactions"
##  [6] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "fstcore"      "scales"       "flextable"    "kableExtra"   "interactions"
##  [6] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[6]]
##  [1] "fstcore"      "scales"       "flextable"    "kableExtra"   "interactions"
##  [6] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[7]]
##  [1] "fstcore"      "scales"       "flextable"    "kableExtra"   "interactions"
##  [6] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[8]]
##  [1] "fstcore"      "scales"       "flextable"    "kableExtra"   "interactions"
##  [6] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "fstcore"      "scales"       "flextable"    "kableExtra"   "interactions"
##  [6] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "fstcore"      "scales"       "flextable"    "kableExtra"   "interactions"
##  [6] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "fstcore"      "scales"       "flextable"    "kableExtra"   "interactions"
##  [6] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[12]]
##  [1] "fstcore"      "scales"       "flextable"    "kableExtra"   "interactions"
##  [6] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"

hungary_data <- read_fst("hungary_data.fst")
df <- hungary_data
df$weight <- df$dweight * df$pweight
survey_design <- svydesign(ids = ~1, data = df, weights = ~weight)


df$trstplt <- ifelse(df$trstplt > 10, NA, df$trstplt)
df$trstprl <- ifelse(df$trstprl > 10, NA, df$trstprl)
df$trstprt <- ifelse(df$trstprt > 10, NA, df$trstprt)


df$trust <- scales::rescale(df$trstplt + df$trstprl + df$trstprt, na.rm = TRUE, to = c(0, 100))
df$populist <- scales::rescale(df$trust, na.rm = TRUE, to=c(100,0))

df <- df %>%
  mutate(
    cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
   
    gen = case_when(
      yrbrn %in% 1900:1945 ~ "1",
      yrbrn %in% 1946:1964 ~ "2",
      yrbrn %in% 1965:1979 ~ "3",
      yrbrn %in% 1980:1996 ~ "4",
      TRUE ~ as.character(yrbrn)  
    ),
    
    gen = factor(gen,
                 levels = c("1", "2", "3", "4"),
                 labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
  )
table(df$gen)

## 
##     Interwar Baby Boomers        Gen X  Millennials 
##         3367         5294         4240         3245

df <- df %>%
  mutate(
    
    Capital = case_when(
      ppltrst %in% c(77, 88, 99) ~ NA_character_,
      ppltrst >= 0 & ppltrst <= 3 ~ "Low",
      ppltrst >= 4 & ppltrst <= 6 ~ "Mid",
      ppltrst >= 7 & ppltrst <= 10 ~ "High",
      TRUE ~ as.character(ppltrst)
    ),
    
    Politically = case_when(
      polintr %in% c(1, 2) ~ "Interested",
      polintr %in% c(3, 4) ~ "Not Interested",
      polintr %in% c(7, 8, 9) ~ NA_character_
    )
  )

model1 <- lm(populist ~ gen, data = df, weights = weight)
model2 <- lm(populist ~ Capital + Politically, data = df, weights = weight)
modelsummary(
  list(model1, model2), 
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}", 
                  "{estimate} ({std.error}){stars}"),
  statistic = NULL)

	(1)	(2)
(Intercept)	65.7 (0.5)***	50.4 (0.5)***
genBaby Boomers	0.7 (0.6)
genGen X	0.3 (0.6)
genMillennials	0.1 (0.6)
CapitalLow		17.5 (0.5)***
CapitalMid		9.8 (0.5)***
PoliticallyNot Interested		7.9 (0.4)***
Num.Obs.	13853	14218
R2	0.000	0.109
R2 Adj.	0.000	0.109
AIC	128234.7	129932.1
BIC	128272.4	129969.9
Log.Lik.	−64112.367	−64961.031
RMSE	23.08	21.79

when observing Hungarian data (used in place of German data which failed to be readable)The r squared value of 0,1 is an indicator of a weak correlation between variables (where the strength of the relationship is dependent on how close the r statistic is to 1). The highest and strongest positive correlation lies with those with in the lowcapital group which have the highest slope (being largest change in y when x increases).








## Task 2

Now generate the model4 interaction plot that we did in the tutorial, but again using the German data instead of the French. Interpret.



```r
hungary_data <- read_fst("hungary_data.fst")
df <- hungary_data

df <- df %>%
  mutate(
    polID = case_when(
      lrscale %in% 0:3 ~ "Left",                    
      lrscale %in% 7:10 ~ "Right",                     
      lrscale %in% 4:6 ~ "Moderate",                  
      lrscale %in% c(77, 88, 99) ~ NA_character_      
    ),
   religious = case_when(
      rlgdgr %in% c(77, 88, 99) ~ NA_real_,
      TRUE ~ rlgdgr
    )
  )
  df <- df %>%
  mutate(behave = ipbhprp,
         secure = impsafe,
         safety = ipstrgv,
         tradition = imptrad,
         rules = ipfrule) %>%
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
  # Apply the reverse coding
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))


df$auth <- scales::rescale(df$behave + 
                      df$secure + 
                      df$safety + 
                      df$tradition + 
                      df$rules, to=c(0,100), na.rm=TRUE)


df <- df %>% filter(!is.na(auth))
model3 <- lm(auth ~ polID + religious, data = df, weights = weight)
model4 <- lm(auth ~ polID + religious + polID*religious, data = df, weights = weight)

modelsummary(
  list(model3, model4),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL)

	(1)	(2)
(Intercept)	69.4 (1.0)***	66.0 (1.5)***
polIDModerate	0.5 (1.1)	5.2 (1.7)**
polIDRight	3.6 (1.1)**	7.0 (1.8)***
religious	0.5 (0.1)***	1.5 (0.3)***
polIDModerate × religious		−1.3 (0.4)***
polIDRight × religious		−0.9 (0.4)*
Num.Obs.	1247	1247
R2	0.021	0.031
R2 Adj.	0.019	0.027
AIC	10200.4	10191.9
BIC	10226.0	10227.8
Log.Lik.	−5095.203	−5088.932
RMSE	14.24	14.19

modelsummary(
  list(model3, model4),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL)

	(1)	(2)
(Intercept)	69.4 (1.0)***	66.0 (1.5)***
polIDModerate	0.5 (1.1)	5.2 (1.7)**
polIDRight	3.6 (1.1)**	7.0 (1.8)***
religious	0.5 (0.1)***	1.5 (0.3)***
polIDModerate × religious		−1.3 (0.4)***
polIDRight × religious		−0.9 (0.4)*
Num.Obs.	1247	1247
R2	0.021	0.031
R2 Adj.	0.019	0.027
AIC	10200.4	10191.9
BIC	10226.0	10227.8
Log.Lik.	−5095.203	−5088.932
RMSE	14.24	14.19

In this summary for the varible of being politacally involved( graphed against the extent to which an indnvidual is religious) for the nation of hungary (as germanys data failed to be accessible) the coeffcient for those within the (1) group lies at a value of 0.5 which is consderably small when compared to the coefficient of those within (2) which have their coefficent at 5.2. This means that on average points/ indnviduals within data set (1) will demonstrate a tendency to increase by incrimiemnts of 0.5 (to a lesser degree) than data set (2). the R squared value that is observed in group (1) iss smaller than that of group (2) despite having an equal number of entries. the R squared figure represents the strenghth of the relationship/correlation between the independant varible (I.V) and the dependant varible (D.V) . depsite the fact that both groups demonstrate a weake correlation (given that R2 is close to 0) relative to data set (1), data set (2) has a stronger observable correlation between vairbles at 0.031. this indncates a 3.1% strenghth relationship. the relationship between polical ideation and religiouys ideation demonstrates a negative correlationw with the coeffecients being negative ( idicating a downward slope)

Task 3

Use data for the Netherlands and the variable coding for model5 and model6 from the tutorial (i.e., both for the cleaning & recode, as well as the linear model formulas). Do a modelsummary table displaying both model outputs. Interpret the coefficients for the MLR without the interaction (i.e., the first displayed model), and interpret model fit metrics for both models.

df <- df %>%
  mutate(religion = case_when(
    rlgblg == 2 ~ "No",
    rlgblg == 1 ~ "Yes",
    rlgblg %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(rlgblg)
  ))
  
  df <- df %>%
  mutate(
    cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
    
    gen = case_when(
      yrbrn %in% 1900:1945 ~ "1",
      yrbrn %in% 1946:1964 ~ "2",
      yrbrn %in% 1965:1979 ~ "3",
      yrbrn %in% 1980:1996 ~ "4",
      TRUE ~ as.character(yrbrn)  
    ),
    
    gen = factor(gen,
                 levels = c("1", "2", "3", "4"),
                 labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
  )
table(df$gen)

## 
##     Interwar Baby Boomers        Gen X  Millennials 
##         3102         4970         3995         3074

df <- df %>%
  mutate(
    polID = case_when(
      lrscale %in% 0:3 ~ "Left",                    
      lrscale %in% 7:10 ~ "Right",                     
      lrscale %in% 4:6 ~ "Moderate",                  
      lrscale %in% c(77, 88, 99) ~ NA_character_      
    ),
   religious = case_when(
      rlgdgr %in% c(77, 88, 99) ~ NA_real_,
      TRUE ~ rlgdgr
    )
  )
  
  df$auth <- scales::rescale(df$behave + 
                      df$secure + 
                      df$safety + 
                      df$tradition + 
                      df$rules, to=c(0,100), na.rm=TRUE)


df <- df %>% filter(!is.na(auth))

interaction_plot <- effect("polID*religious", model4, na.rm=TRUE)

plot(interaction_plot,
     main="Interaction effect",
     xlab="Religiousness",
     ylab="Authoritarian attitudes scale")

interaction_plot

## 
##  polID*religious effect
##           religious
## polID             0        2        5        8       10
##   Left     65.95423 68.86099 73.22113 77.58128 80.48804
##   Moderate 71.19891 71.52314 72.00948 72.49582 72.82005
##   Right    72.94878 74.01124 75.60494 77.19863 78.26110

table(df$religion)

## 
##   No  Yes 
## 6672 8752

df$auth <- scales::rescale(df$behave + 
                      df$secure + 
                      df$safety + 
                      df$tradition + 
                      df$rules, to=c(0,100), na.rm=TRUE)


df <- df %>% filter(!is.na(auth))

df <- df %>%
  mutate(ID = case_when(
    lrscale >= 0 & lrscale <= 4 ~ "Left",
    lrscale >= 6 & lrscale <= 10 ~ "Right",
    lrscale > 10 ~ NA_character_,  # Set values above 10 as NA
    TRUE ~ NA_character_  # Ensure value 5 and any other unexpected values are set as NA
  ))
table(df$ID)

## 
##  Left Right 
##  3356  5316

model5 <- lm(auth ~ religion + ID + gen, data = df, weights = weight)
model6 <- lm(auth ~ religion + ID + gen + religion*gen, data = df, weights = weight)

modelsummary(
  list(model5, model6),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
  coef_omit = "Intercept")

	(1)	(2)
religionYes	1.8 (1.0)+	3.3 (3.2)
IDRight	4.3 (1.0)***	4.2 (1.0)***
genBaby Boomers	−2.8 (1.5)+	−2.2 (3.1)
genGen X	−5.9 (1.6)***	−2.3 (3.1)
genMillennials	−6.0 (1.7)***	−6.6 (3.2)*
religionYes × genBaby Boomers		−0.4 (3.5)
religionYes × genGen X		−6.8 (3.7)+
religionYes × genMillennials		2.7 (3.8)
Num.Obs.	783	783
R2	0.045	0.060
R2 Adj.	0.039	0.051
AIC	6385.0	6378.0
BIC	6417.7	6424.7
Log.Lik.	−3185.520	−3179.020
RMSE	13.96	13.93



## Task 4

Produce the model7 interaction plot from the tutorial, but again using the Netherlands data. Interpret.



```r
belgium_data <- read_fst("belgium_data.fst")
df <- belgium_data


df <- df %>%
  mutate(religion = case_when(
    rlgblg == 2 ~ "No",
    rlgblg == 1 ~ "Yes",
    rlgblg %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(rlgblg)
  ))

df <- df %>%
  mutate(
    cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
    
    gen = case_when(
      yrbrn %in% 1900:1945 ~ "1",
      yrbrn %in% 1946:1964 ~ "2",
      yrbrn %in% 1965:1979 ~ "3",
      yrbrn %in% 1980:1996 ~ "4",
      TRUE ~ as.character(yrbrn)  
    ),
    # After recoding, the gen variable is converted into a factor with labels for clearer interpretation.
    # Factors are used in R to handle categorical variables.
    gen = factor(gen,
                 levels = c("1", "2", "3", "4"),
                 labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
  )
table(df$gen)

## 
##     Interwar Baby Boomers        Gen X  Millennials 
##         3135         5541         4268         3822

df <- df %>%
  mutate(behave = ipbhprp,
         secure = impsafe,
         safety = ipstrgv,
         tradition = imptrad,
         rules = ipfrule) %>%
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
  # Apply the reverse coding
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))


df$auth <- scales::rescale(df$behave + 
                      df$secure + 
                      df$safety + 
                      df$tradition + 
                      df$rules, to=c(0,100), na.rm=TRUE)


df <- df %>% filter(!is.na(auth))

table(df$religion)

## 
##   No  Yes 
## 9665 7478

df <- df %>%
  mutate(ID = case_when(
    lrscale >= 0 & lrscale <= 4 ~ "Left",
    lrscale >= 6 & lrscale <= 10 ~ "Right",
    lrscale > 10 ~ NA_character_,  # Set values above 10 as NA
    TRUE ~ NA_character_  # Ensure value 5 and any other unexpected values are set as NA
  ))
table(df$ID)

## 
##  Left Right 
##  5117  5359

model5 <- lm(auth ~ religion + ID + gen, data = df, weights = weight)
model6 <- lm(auth ~ religion + ID + gen + religion*gen, data = df, weights = weight)






interaction_plot <- effect("religion*gen", model6, na.rm=TRUE)

plot(interaction_plot,
     main="Interaction effect",
     xlab="Generations",
     ylab="Authoritarian attitudes scale")

interaction_plot

## 
##  religion*gen effect
##         gen
## religion Interwar Baby Boomers    Gen X Millennials
##      No  70.73603     64.96512 60.86166    60.64405
##      Yes 74.82832     71.04569 71.50128    71.08290

modelsummary(
  list(model5, model6),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
  coef_omit = "Intercept")

	(1)	(2)
religionYes	8.0 (0.9)***	4.1 (2.4)+
IDRight	1.3 (0.9)	1.4 (0.9)
genBaby Boomers	−4.1 (1.4)**	−5.8 (2.1)**
genGen X	−6.6 (1.5)***	−9.9 (2.1)***
genMillennials	−7.0 (1.5)***	−10.1 (2.1)***
religionYes × genBaby Boomers		2.0 (2.8)
religionYes × genGen X		6.5 (3.0)*
religionYes × genMillennials		6.3 (3.0)*
Num.Obs.	1058	1058
R2	0.109	0.116
R2 Adj.	0.105	0.109
AIC	8674.7	8672.6
BIC	8709.4	8722.2
Log.Lik.	−4330.349	−4326.276
RMSE	14.36	14.31

interaction_plot <- effect("religion*gen", model6, na.rm=TRUE)

plot(interaction_plot,
     main="Interaction effect",
     xlab="Generations",
     ylab="Authoritarian attitudes scale")

interaction_plot

## 
##  religion*gen effect
##         gen
## religion Interwar Baby Boomers    Gen X Millennials
##      No  70.73603     64.96512 60.86166    60.64405
##      Yes 74.82832     71.04569 71.50128    71.08290

cat_plot(model6, pred = gen, modx = religion, jnplot = TRUE)

Based on the information above it can be said that with time as generations progress there are noticeable differences across the different cohorts,where notable there a progressive decrease in authoritarian values both for individuals who align themselves heavily with religion and those that do not practice. This is describing the interaction effect between the 2 variables, where when 2 variables work in conjunction there’s a larger effect exerted on the output. being religious and of a specific cohort will have a combined interaction effect on the extent to which an individual will endorse authoritarian values, with younger individuals demonstrating the tendency to hold less authoritarian values provided they are not religiously inclined.

Task 5

Produce the model8 interaction plot from the tutorial, but again using the Netherlands data. Interpret.

belgium_data <- read_fst("belgium_data.fst")
df <- belgium_data


df <- df %>%
  mutate(
    polID = case_when(
      lrscale %in% 0:3 ~ "Left",                    
      lrscale %in% 7:10 ~ "Right",                     
      lrscale %in% 4:6 ~ "Moderate",                  
      lrscale %in% c(77, 88, 99) ~ NA_character_      
    ),
   religious = case_when(
      rlgdgr %in% c(77, 88, 99) ~ NA_real_,
      TRUE ~ rlgdgr
    )
  )

df <- df %>%
  mutate(religion = case_when(
    rlgblg == 2 ~ "No",
    rlgblg == 1 ~ "Yes",
    rlgblg %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(rlgblg)
  ))

df <- df %>%
  mutate(
    cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
    
    gen = case_when(
      yrbrn %in% 1900:1945 ~ "1",
      yrbrn %in% 1946:1964 ~ "2",
      yrbrn %in% 1965:1979 ~ "3",
      yrbrn %in% 1980:1996 ~ "4",
      TRUE ~ as.character(yrbrn)  
    ),
    # After recoding, the gen variable is converted into a factor with labels for clearer interpretation.
    # Factors are used in R to handle categorical variables.
    gen = factor(gen,
                 levels = c("1", "2", "3", "4"),
                 labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
  )
table(df$gen)

## 
##     Interwar Baby Boomers        Gen X  Millennials 
##         3135         5541         4268         3822

df <- df %>%
  mutate(behave = ipbhprp,
         secure = impsafe,
         safety = ipstrgv,
         tradition = imptrad,
         rules = ipfrule) %>%
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
                ~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
  # Apply the reverse coding
  mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))


df$auth <- scales::rescale(df$behave + 
                      df$secure + 
                      df$safety + 
                      df$tradition + 
                      df$rules, to=c(0,100), na.rm=TRUE)


df <- df %>% filter(!is.na(auth))

table(df$religion)

## 
##   No  Yes 
## 9665 7478

df <- df %>%
  mutate(ID = case_when(
    lrscale >= 0 & lrscale <= 4 ~ "Left",
    lrscale >= 6 & lrscale <= 10 ~ "Right",
    lrscale > 10 ~ NA_character_,  # Set values above 10 as NA
    TRUE ~ NA_character_  # Ensure value 5 and any other unexpected values are set as NA
  ))
table(df$ID)

## 
##  Left Right 
##  5117  5359

model5 <- lm(auth ~ religion + ID + gen, data = df, weights = weight)
model6 <- lm(auth ~ religion + ID + gen + religion*gen, data = df, weights = weight)






interaction_plot <- effect("religion*gen", model6, na.rm=TRUE)

plot(interaction_plot,
     main="Interaction effect",
     xlab="Generations",
     ylab="Authoritarian attitudes scale")

interaction_plot

## 
##  religion*gen effect
##         gen
## religion Interwar Baby Boomers    Gen X Millennials
##      No  70.73603     64.96512 60.86166    60.64405
##      Yes 74.82832     71.04569 71.50128    71.08290

modelsummary(
  list(model5, model6),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
  coef_omit = "Intercept")

	(1)	(2)
religionYes	8.0 (0.9)***	4.1 (2.4)+
IDRight	1.3 (0.9)	1.4 (0.9)
genBaby Boomers	−4.1 (1.4)**	−5.8 (2.1)**
genGen X	−6.6 (1.5)***	−9.9 (2.1)***
genMillennials	−7.0 (1.5)***	−10.1 (2.1)***
religionYes × genBaby Boomers		2.0 (2.8)
religionYes × genGen X		6.5 (3.0)*
religionYes × genMillennials		6.3 (3.0)*
Num.Obs.	1058	1058
R2	0.109	0.116
R2 Adj.	0.105	0.109
AIC	8674.7	8672.6
BIC	8709.4	8722.2
Log.Lik.	−4330.349	−4326.276
RMSE	14.36	14.31

interaction_plot <- effect("religion*gen", model6, na.rm=TRUE)

plot(interaction_plot,
     main="Interaction effect",
     xlab="Generations",
     ylab="Authoritarian attitudes scale")

interaction_plot

## 
##  religion*gen effect
##         gen
## religion Interwar Baby Boomers    Gen X Millennials
##      No  70.73603     64.96512 60.86166    60.64405
##      Yes 74.82832     71.04569 71.50128    71.08290

df <- df %>%
  mutate(religion = case_when(
    rlgblg == 2 ~ "No",
    rlgblg == 1 ~ "Yes",
    rlgblg %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(rlgblg)
  ))

# check
table(df$religion)

## 
##   No  Yes 
## 9665 7478

cat_plot(model6, pred = gen, modx = religion, jnplot = TRUE)

model8 <- lm(auth ~ religious + ID + religious*ID, data = df, weights = weight)
interact_plot(model8, pred = religious, modx = ID, jnplot = TRUE)

There seems to be a positive correlation being sustained between the degree to which an individual self reports as religious and the authoritarian values they endorse and internalize, where there’s a progressive increase in more authoritarian values the more religious they are as a person. As religiosity increases so does the endorsement of authoritarian values which could lead researchers to infer that it can be expected that an individual who is religious will hold authoritarian values. because a linear relationship was established one can conclude that the increases are consistent and progressive.