# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "MASS", "aod", "interactions", "kableExtra", "flextable", "scales") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: carData
##
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
##
## Loading required package: grid
##
## Loading required package: Matrix
##
##
## Attaching package: 'Matrix'
##
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Loading required package: survival
##
##
## Attaching package: 'survey'
##
##
## The following object is masked from 'package:graphics':
##
## dotchart
##
##
##
## Attaching package: 'MASS'
##
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
##
## Attaching package: 'aod'
##
##
## The following object is masked from 'package:survival':
##
## rats
##
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
##
##
##
## Attaching package: 'flextable'
##
##
## The following objects are masked from 'package:kableExtra':
##
## as_image, footnote
##
##
## The following object is masked from 'package:purrr':
##
## compose
##
##
##
## Attaching package: 'scales'
##
##
## The following object is masked from 'package:purrr':
##
## discard
##
##
## The following object is masked from 'package:readr':
##
## col_factor
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "infer" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "fst" "infer" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "effects" "carData" "modelsummary" "fst" "infer"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "survey" "survival" "Matrix" "grid" "effects"
## [6] "carData" "modelsummary" "fst" "infer" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[7]]
## [1] "MASS" "survey" "survival" "Matrix" "grid"
## [6] "effects" "carData" "modelsummary" "fst" "infer"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[8]]
## [1] "aod" "MASS" "survey" "survival" "Matrix"
## [6] "grid" "effects" "carData" "modelsummary" "fst"
## [11] "infer" "lubridate" "forcats" "stringr" "dplyr"
## [16] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [21] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [26] "datasets" "methods" "base"
##
## [[9]]
## [1] "interactions" "aod" "MASS" "survey" "survival"
## [6] "Matrix" "grid" "effects" "carData" "modelsummary"
## [11] "fst" "infer" "lubridate" "forcats" "stringr"
## [16] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [21] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [26] "utils" "datasets" "methods" "base"
##
## [[10]]
## [1] "kableExtra" "interactions" "aod" "MASS" "survey"
## [6] "survival" "Matrix" "grid" "effects" "carData"
## [11] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [21] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [26] "grDevices" "utils" "datasets" "methods" "base"
##
## [[11]]
## [1] "flextable" "kableExtra" "interactions" "aod" "MASS"
## [6] "survey" "survival" "Matrix" "grid" "effects"
## [11] "carData" "modelsummary" "fst" "infer" "lubridate"
## [16] "forcats" "stringr" "dplyr" "purrr" "readr"
## [21] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [26] "graphics" "grDevices" "utils" "datasets" "methods"
## [31] "base"
##
## [[12]]
## [1] "scales" "flextable" "kableExtra" "interactions" "aod"
## [6] "MASS" "survey" "survival" "Matrix" "grid"
## [11] "effects" "carData" "modelsummary" "fst" "infer"
## [16] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [21] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [26] "stats" "graphics" "grDevices" "utils" "datasets"
## [31] "methods" "base"
task 1 Use data for Germany and the variable coding for model3 and model4 from the tutorial (i.e., both for the cleaning & recode, as well as the linear model formulas). Do a modelsummary table displaying both model outputs. Interpret the coefficients for the MLR without the interaction (i.e., the first displayed model), and interpret model fit metrics for both models.
germany_data <- read_fst("germany_data.fst")
df <- germany_data
df$weight <- df$dweight * df$pweight
survey_design <- svydesign(ids = ~1, data = df, weights = ~weight)
df <- df %>%
mutate(behave = ipbhprp,
secure = impsafe,
safety = ipstrgv,
tradition = imptrad,
rules = ipfrule) %>%
mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
# Apply the reverse coding
mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))
# Now you can calculate 'schwartzauth' after the NA recoding
df$auth <- scales::rescale(df$behave +
df$secure +
df$safety +
df$tradition +
df$rules, to=c(0,100), na.rm=TRUE)
df <- df %>% filter(!is.na(auth))
df <- df %>%
mutate(
polID = case_when(
lrscale %in% 0:3 ~ "Left",
lrscale %in% 7:10 ~ "Right",
lrscale %in% 4:6 ~ "Moderate",
lrscale %in% c(77, 88, 99) ~ NA_character_
),
religious = case_when(
rlgdgr %in% c(77, 88, 99) ~ NA_real_,
TRUE ~ rlgdgr
)
)
model3 <- lm(auth ~ polID + religious, data = df, weights = weight)
model4 <- lm(auth ~ polID + religious + polID*religious, data = df, weights = weight)
modelsummary(
list(model3, model4),
fmt = 1,
estimate = c( "{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL)
| Â (1) | Â Â (2) | |
|---|---|---|
| (Intercept) | 55.3 (0.3)*** | 56.4 (0.3)*** |
| polIDModerate | 4.6 (0.3)*** | 3.4 (0.4)*** |
| polIDRight | 7.7 (0.4)*** | 4.2 (0.7)*** |
| religious | 0.9 (0.0)*** | 0.6 (0.1)*** |
| polIDModerate × religious | 0.3 (0.1)*** | |
| polIDRight × religious | 0.8 (0.1)*** | |
| Num.Obs. | 23373 | 23373 |
| R2 | 0.048 | 0.050 |
| R2 Adj. | 0.048 | 0.050 |
| AIC | 200594.1 | 200558.6 |
| BIC | 200634.4 | 200615.0 |
| Log.Lik. | −100292.049 | −100272.298 |
| RMSE | 17.27 | 17.24 |
answer: The analysis reveals that political right-leaning individuals score significantly higher on authoritarian values compared to their left-leaning counterparts, even with similar levels of religious commitment. Additionally, each incremental increase in religious commitment within the same ideological group corresponds to a 0.9-point rise in authoritarian values, with highly significant statistical support.
task 2 Now generate the model4 interaction plot that we did in the tutorial, but again using the German data instead of the French. Interpret.
interaction_plot <- effect("polID*religious", model4, na.rm=TRUE)
plot(interaction_plot,
main="Interaction effect",
xlab="Religiousness",
ylab="Authoritarian attitudes scale")
interaction_plot
##
## polID*religious effect
## religious
## polID 0 2 5 8 10
## Left 56.40706 57.56775 59.30879 61.04983 62.21053
## Moderate 59.85063 61.66176 64.37844 67.09513 68.90626
## Right 60.60237 63.35032 67.47224 71.59416 74.34211
answer: The analysis shows distinct trends in religiousness across different political ideologies. Left-leaning individuals exhibit slightly higher religiousness, while moderates display even higher levels compared to the left-leaning group. However, right-leaning individuals show substantially elevated levels of religiousness compared to both left-leaning and moderate perspectives.
Task 3
Use data for the Netherlands and the variable coding for model5 and model6 from the tutorial (i.e., both for the cleaning & recode, as well as the linear model formulas). Do a modelsummary table displaying both model outputs. Interpret the coefficients for the MLR without the interaction (i.e., the first displayed model), and interpret model fit metrics for both models.
netherlands_data <- read_fst("netherlands_data.fst")
df <- netherlands_data
df <- df %>%
mutate(behave = ipbhprp,
secure = impsafe,
safety = ipstrgv,
tradition = imptrad,
rules = ipfrule) %>%
mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
# Apply the reverse coding
mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))
# Now you can calculate 'schwartzauth' after the NA recoding
df$auth <- scales::rescale(df$behave +
df$secure +
df$safety +
df$tradition +
df$rules, to=c(0,100), na.rm=TRUE)
df <- df %>% filter(!is.na(auth))
df <- df %>%
mutate(
cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
gen = case_when(
yrbrn %in% 1900:1945 ~ "1",
yrbrn %in% 1946:1964 ~ "2",
yrbrn %in% 1965:1979 ~ "3",
yrbrn %in% 1980:1996 ~ "4",
TRUE ~ as.character(yrbrn)
),
gen = factor(gen,
levels = c("1", "2", "3", "4"),
labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
)
table(df$gen)
##
## Interwar Baby Boomers Gen X Millennials
## 3771 6147 4548 2730
df <- df %>%
mutate(religion = case_when(
rlgblg == 2 ~ "No",
rlgblg == 1 ~ "Yes",
rlgblg %in% c(7, 8, 9) ~ NA_character_,
TRUE ~ as.character(rlgblg)
))
# check
table(df$religion)
##
## No Yes
## 10913 6750
df <- df %>%
mutate(ID = case_when(
lrscale >= 0 & lrscale <= 4 ~ "Left",
lrscale >= 6 & lrscale <= 10 ~ "Right",
lrscale > 10 ~ NA_character_, # Set values above 10 as NA
TRUE ~ NA_character_ # Ensure value 5 and any other unexpected values are set as NA
))
table(df$ID)
##
## Left Right
## 5449 7010
model5 <- lm(auth ~ religion + ID + gen, data = df, weights = weight)
model6 <- lm(auth ~ religion + ID + gen + religion*gen, data = df, weights = weight)
modelsummary(
list(model5, model6),
fmt = 1,
estimate = c( "{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL,
coef_omit = "Intercept")
| Â (1) | Â Â (2) | |
|---|---|---|
| religionYes | 8.2 (0.9)*** | 7.5 (2.1)*** |
| IDRight | 3.4 (0.9)*** | 3.4 (0.9)*** |
| genBaby Boomers | −5.8 (1.3)*** | −6.5 (1.7)*** |
| genGen X | −7.3 (1.4)*** | −7.6 (1.8)*** |
| genMillennials | −8.6 (1.5)*** | −8.6 (1.9)*** |
| religionYes × genBaby Boomers | 1.6 (2.6) | |
| religionYes × genGen X | 0.5 (2.8) | |
| religionYes × genMillennials | −0.6 (3.2) | |
| Num.Obs. | 1147 | 1147 |
| R2 | 0.126 | 0.127 |
| R2 Adj. | 0.123 | 0.121 |
| AIC | 9479.3 | 9484.5 |
| BIC | 9514.7 | 9535.0 |
| Log.Lik. | −4732.669 | −4732.251 |
| RMSE | 14.94 | 14.94 |
answer:Affiliation with religion corresponds to a 7.2 unit increase in the dependent variable compared to non-affiliation. Each unit increase in IDRight leads to a 4.2 unit increase, while being a Baby Boomer is associated with a decrease of 5.2 units compared to non-Baby Boomers. Generation X individuals see a decrease of 7.7 units, and Millennials experience an 8.4 unit decrease compared to non-members of their respective generations. The model without interaction terms explains 12.4% of the dependent variable’s variance, while the inclusion of interaction terms slightly improves explanatory capability by 0.1% to 12.5%.
task 4 Produce the model7 interaction plot from the tutorial, but again using the Netherlands data. Interpret.
df <- df %>%
mutate(
polID = case_when(
lrscale %in% 0:3 ~ "Left",
lrscale %in% 7:10 ~ "Right",
lrscale %in% 4:6 ~ "Moderate",
lrscale %in% c(77, 88, 99) ~ NA_character_
),
religious = case_when(
rlgdgr %in% c(77, 88, 99) ~ NA_real_,
TRUE ~ rlgdgr
)
)
model7 <- lm(auth ~ + cohort + polID + cohort*polID, data = df, weights = weight)
interact_plot(model7, pred = cohort, modx = polID, jnplot = TRUE)
answer: The graph depicts intersections between the lines representing
right and moderate political ideologies, indicating an interaction
between these variables. Additionally, a slight intersection with these
lines occurs for the political left, suggesting an interaction between
cohort and political ideology among left-leaning individuals.
task 5 Produce the model8 interaction plot from the tutorial, but again using the Netherlands data. Interpret.
model8 <- lm(auth ~ religious + ID + religious*ID, data = df, weights = weight)
interact_plot(model8, pred = religious, modx = ID, jnplot = TRUE)
answer: The graph shows parallel lines for both right and left political
ideologies, indicating no intersection and thus no interaction between
political ideology and religiousness on the authoritarian scale.