# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "interactions","tidymodels") # add any you need here
# Installing new packages
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: carData
##
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
##
## Loading required package: grid
##
## Loading required package: Matrix
##
##
## Attaching package: 'Matrix'
##
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Loading required package: survival
##
##
## Attaching package: 'survey'
##
##
## The following object is masked from 'package:graphics':
##
## dotchart
##
##
## Registered S3 methods overwritten by 'broom':
## method from
## tidy.glht jtools
## tidy.summary.glht jtools
##
## ── Attaching packages ────────────────────────────────────── tidymodels 1.1.1 ──
##
## ✔ broom 1.0.5 ✔ rsample 1.2.0
## ✔ dials 1.2.1 ✔ tune 1.1.2
## ✔ modeldata 1.3.0 ✔ workflows 1.1.4
## ✔ parsnip 1.2.0 ✔ workflowsets 1.0.1
## ✔ recipes 1.0.10 ✔ yardstick 1.3.0
##
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ scales::discard() masks purrr::discard()
## ✖ Matrix::expand() masks tidyr::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ recipes::fixed() masks stringr::fixed()
## ✖ dplyr::lag() masks stats::lag()
## ✖ Matrix::pack() masks tidyr::pack()
## ✖ yardstick::spec() masks readr::spec()
## ✖ recipes::step() masks stats::step()
## ✖ Matrix::unpack() masks tidyr::unpack()
## ✖ recipes::update() masks Matrix::update(), stats::update()
## • Use suppressPackageStartupMessages() to eliminate package startup messages
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "infer" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "fst" "infer" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "effects" "carData" "modelsummary" "fst" "infer"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "survey" "survival" "Matrix" "grid" "effects"
## [6] "carData" "modelsummary" "fst" "infer" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[7]]
## [1] "interactions" "survey" "survival" "Matrix" "grid"
## [6] "effects" "carData" "modelsummary" "fst" "infer"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[8]]
## [1] "yardstick" "workflowsets" "workflows" "tune" "rsample"
## [6] "recipes" "parsnip" "modeldata" "dials" "scales"
## [11] "broom" "tidymodels" "interactions" "survey" "survival"
## [16] "Matrix" "grid" "effects" "carData" "modelsummary"
## [21] "fst" "infer" "lubridate" "forcats" "stringr"
## [26] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [31] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [36] "utils" "datasets" "methods" "base"
germany_data <- read_fst("germany_data.fst")
Use data for Germany and the variable coding for model3 and model4 from the tutorial (i.e., both for the cleaning & recode, as well as the linear model formulas). Do a modelsummary table displaying both model outputs. Interpret the coefficients for the MLR without the interaction (i.e., the first displayed model), and interpret model fit metrics for both models.
germany_data <- read_fst("germany_data.fst")
df <- germany_data
df <- df %>%
mutate(behave = ipbhprp,
secure = impsafe,
safety = ipstrgv,
tradition = imptrad,
rules = ipfrule) %>%
mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
# Apply the reverse coding
mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))
df$auth <- scales::rescale(df$behave +
df$secure +
df$safety +
df$tradition +
df$rules, to=c(0,100), na.rm=TRUE)
df <- df %>%
mutate(
polID = case_when(
lrscale %in% 0:3 ~ "Left",
lrscale %in% 7:10 ~ "Right",
lrscale %in% 4:6 ~ "Moderate",
lrscale %in% c(77, 88, 99) ~ NA_character_
),
religious = case_when(
rlgdgr %in% c(77, 88, 99) ~ NA_real_,
TRUE ~ rlgdgr
)
)
df <- df %>% filter(!is.na(auth))
model3 <- lm(auth ~ polID + religious, data = df, weights = weight)
model4 <- lm(auth ~ polID + religious + polID*religious, data = df, weights = weight)
modelsummary(
list(model3, model4),
fmt = 1,
estimate = c( "{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL)
| Â (1) | Â Â (2) | |
|---|---|---|
| (Intercept) | 57.5 (0.7)*** | 59.6 (0.9)*** |
| polIDModerate | 5.3 (0.7)*** | 2.6 (1.1)* |
| polIDRight | 9.3 (1.1)*** | 3.4 (1.9)+ |
| religious | 0.3 (0.1)** | −0.3 (0.2)+ |
| polIDModerate × religious | 0.8 (0.2)** | |
| polIDRight × religious | 1.5 (0.4)*** | |
| Num.Obs. | 2855 | 2855 |
| R2 | 0.035 | 0.041 |
| R2 Adj. | 0.034 | 0.039 |
| AIC | 24680.0 | 24665.5 |
| BIC | 24709.7 | 24707.2 |
| Log.Lik. | −12334.978 | −12325.764 |
| RMSE | 17.13 | 17.13 |
The PollDModerate value of 5.3 means that those on the right who identify as politically moderate are 5.3 units higher than those on the left which is the reference category. PollDRight indicates that people who identify as being politically right are 9.3 units higher than people who identify as politically left on average. The value of 0.3 as the religious predictor means that on the authoritarian value scale, every full unit increase in religiousness leads to a 0.3 value increase in the associated expected average. The r sqaured value is also less than 1 incidcating that the model does not have an ideal predictor. The stars next to the values indicate that the p-value is less than 0.001 so the null hypothesis is incorrectly rejected.
Now generate the model4 interaction plot that we did in the tutorial, but again using the German data instead of the French. Interpret.
germany_data <- read_fst("germany_data.fst")
interaction_plot <- effect("polID*religious", model4, na.rm=TRUE)
plot(interaction_plot,
main="Interaction effect",
xlab="Religiousness",
ylab="Authoritarian attitudes scale")
interaction_plot
##
## polID*religious effect
## religious
## polID 0 2 5 8 10
## Left 59.61010 58.93398 57.91980 56.90562 56.22950
## Moderate 62.21299 63.14317 64.53843 65.93370 66.86388
## Right 63.00456 65.26702 68.66072 72.05442 74.31689
The PolID values decrease as the religious values increase which means that according to the Authoritarian Attitudes scale, those who identify as politically left are also less religious. The upward slope for those who identify as politically moderate and right shows us that these individuals are more religious. Those who are politically right are more religious than those who are politically moderate. We can come to this conclusion because the upward slope of politically right is steeper than the slope of those that are politically moderate.
Use data for the Netherlands and the variable coding for model5 and model6 from the tutorial (i.e., both for the cleaning & recode, as well as the linear model formulas). Do a modelsummary table displaying both model outputs. Interpret the coefficients for the MLR without the interaction (i.e., the first displayed model), and interpret model fit metrics for both models.
netherlands_data <- read_fst("netherlands_data.fst")
df <- df %>%
mutate(
polID = case_when(
lrscale %in% 0:3 ~ "Left",
lrscale %in% 7:10 ~ "Right",
lrscale %in% 4:6 ~ "Moderate",
lrscale %in% c(77, 88, 99) ~ NA_character_
),
religious = case_when(
rlgdgr %in% c(77, 88, 99) ~ NA_real_,
TRUE ~ rlgdgr
)
)
df <- df %>%
mutate(
cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
# Recoding generational cohorts based on the year of birth (yrbrn).
# The year of birth is categorized into different generational cohorts.
# Interwar (1900-1945), Baby Boomers (1946-1964), Gen X (1965-1979), Millennials (1980-1996).
# The 'TRUE' line is a catch-all that keeps the original year of birth for those not in these ranges.
gen = case_when(
yrbrn %in% 1900:1945 ~ "1",
yrbrn %in% 1946:1964 ~ "2",
yrbrn %in% 1965:1979 ~ "3",
yrbrn %in% 1980:1996 ~ "4",
TRUE ~ as.character(yrbrn)
),
# After recoding, the gen variable is converted into a factor with labels for clearer interpretation.
# Factors are used in R to handle categorical variables.
gen = factor(gen,
levels = c("1", "2", "3", "4"),
labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
)
table(df$gen)
##
## Interwar Baby Boomers Gen X Millennials
## 5360 8583 5601 4652
df <- df %>%
mutate(religion = case_when(
rlgblg == 2 ~ "No",
rlgblg == 1 ~ "Yes",
rlgblg %in% c(7, 8, 9) ~ NA_character_,
TRUE ~ as.character(rlgblg)
))
# check
table(df$religion)
##
## No Yes
## 11088 13750
df <- df %>%
mutate(ID = case_when(
lrscale >= 0 & lrscale <= 4 ~ "Left",
lrscale >= 6 & lrscale <= 10 ~ "Right",
lrscale > 10 ~ NA_character_, # Set values above 10 as NA
TRUE ~ NA_character_ # Ensure value 5 and any other unexpected values are set as NA
))
table(df$ID)
##
## Left Right
## 9745 4882
model5 <- lm(auth ~ religion + ID + gen, data = df, weights = weight)
model6 <- lm(auth ~ religion + ID + gen + religion*gen, data = df, weights = weight)
modelsummary(
list(model5, model6),
fmt = 1,
estimate = c( "{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL,
coef_omit = "Intercept")
| Â (1) | Â Â (2) | |
|---|---|---|
| religionYes | 1.3 (0.8) | −0.9 (2.0) |
| IDRight | 7.1 (0.9)*** | 7.3 (0.9)*** |
| genBaby Boomers | −7.6 (1.1)*** | −8.2 (1.8)*** |
| genGen X | −9.7 (1.3)*** | −11.5 (1.9)*** |
| genMillennials | −12.7 (1.3)*** | −15.7 (2.0)*** |
| religionYes × genBaby Boomers | 0.8 (2.3) | |
| religionYes × genGen X | 3.2 (2.5) | |
| religionYes × genMillennials | 5.8 (2.7)* | |
| Num.Obs. | 1761 | 1761 |
| R2 | 0.103 | 0.106 |
| R2 Adj. | 0.100 | 0.102 |
| AIC | 15122.2 | 15121.7 |
| BIC | 15160.5 | 15176.5 |
| Log.Lik. | −7554.112 | −7550.862 |
| RMSE | 16.76 | 16.76 |
interaction_plot <- effect("religion*gen", model6, na.rm=TRUE)
plot(interaction_plot,
main="Interaction effect",
xlab="Generations",
ylab="Authoritarian attitudes scale")
interaction_plot
##
## religion*gen effect
## gen
## religion Interwar Baby Boomers Gen X Millennials
## No 70.47065 62.23112 58.99075 54.73118
## Yes 69.61993 62.20566 61.30541 59.63364
cat_plot(model5, pred = gen, modx = religion, jnplot = TRUE)
## Warning: gen and religion are not included in an interaction with one another in the
## model.
cat_plot(model6, pred = gen, modx = religion, jnplot = TRUE)
The 1.3 religionYes value in model 5 shows that people who identify as
religious are 1.3 units higher than average. The IDRight value of 7.1
shows that those who are politically right are 7.1 units higher than the
average and are also more religious. The R-squared value is 0.100,
therefore this model does not describe a variation in the data. once
again, the stars mean the p-value is less than 0.001 so the null
hypothesis is incorrectly rejected
The -0.9 religionYes value in model 6 shows that people who identity as religious are 0.9 units below the average. The IDRight value of 7.3 shows that those who are politically right are 7.3 units higher than the average. The adjusted R-squared value is 0.102 so the model does not account most of the variation in data. Th p-value suggests that the data is not statistically significant.
Produce the model7 interaction plot from the tutorial, but again using the Netherlands data. Interpret.
netherlands_data <- read_fst("netherlands_data.fst")
interaction_plot
##
## religion*gen effect
## gen
## religion Interwar Baby Boomers Gen X Millennials
## No 70.47065 62.23112 58.99075 54.73118
## Yes 69.61993 62.20566 61.30541 59.63364
model7 <- lm(auth ~ + cohort + polID + cohort*polID, data = df, weights = weight)
interact_plot(model7, pred = cohort, modx = polID, jnplot = TRUE)
None of the lines cross each other, meaning there is no interaction
between the three.
Produce the model8 interaction plot from the tutorial, but again using the Netherlands data. Interpret.
model8 <- lm(auth ~ religious + ID + religious*ID, data = df, weights = weight)
interaction_plot
##
## religion*gen effect
## gen
## religion Interwar Baby Boomers Gen X Millennials
## No 70.47065 62.23112 58.99075 54.73118
## Yes 69.61993 62.20566 61.30541 59.63364
interact_plot(model8, pred = religious, modx = ID, jnplot = TRUE)
There is a relationship between idenfying as politically left/right and
religion because the lines are not parallel. Those who identify as
politically right are more religious than those who identify as
politically left.