rm(list=ls()); gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 529382 28.3 1177811 63 NA 669417 35.8
## Vcells 975796 7.5 8388608 64 16384 1851693 14.2
# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "MASS", "aod", "interactions", "kableExtra", "flextable", "scales") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Version 2.0.0 of `modelsummary`, to be released soon, will introduce a
## breaking change: The default table-drawing package will be `tinytable`
## instead of `kableExtra`. All currently supported table-drawing packages
## will continue to be supported for the foreseeable future, including
## `kableExtra`, `gt`, `huxtable`, `flextable, and `DT`.
##
## You can always call the `config_modelsummary()` function to change the
## default table-drawing package in persistent fashion. To try `tinytable`
## now:
##
## config_modelsummary(factory_default = 'tinytable')
##
## To set the default back to `kableExtra`:
##
## config_modelsummary(factory_default = 'kableExtra')
##
## Loading required package: carData
##
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
##
## Loading required package: grid
##
## Loading required package: Matrix
##
##
## Attaching package: 'Matrix'
##
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Loading required package: survival
##
##
## Attaching package: 'survey'
##
##
## The following object is masked from 'package:graphics':
##
## dotchart
##
##
##
## Attaching package: 'MASS'
##
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
##
## Attaching package: 'aod'
##
##
## The following object is masked from 'package:survival':
##
## rats
##
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
##
##
##
## Attaching package: 'flextable'
##
##
## The following objects are masked from 'package:kableExtra':
##
## as_image, footnote
##
##
## The following object is masked from 'package:purrr':
##
## compose
##
##
##
## Attaching package: 'scales'
##
##
## The following object is masked from 'package:purrr':
##
## discard
##
##
## The following object is masked from 'package:readr':
##
## col_factor
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "infer" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "fst" "infer" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "effects" "carData" "modelsummary" "fst" "infer"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "survey" "survival" "Matrix" "grid" "effects"
## [6] "carData" "modelsummary" "fst" "infer" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[7]]
## [1] "MASS" "survey" "survival" "Matrix" "grid"
## [6] "effects" "carData" "modelsummary" "fst" "infer"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[8]]
## [1] "aod" "MASS" "survey" "survival" "Matrix"
## [6] "grid" "effects" "carData" "modelsummary" "fst"
## [11] "infer" "lubridate" "forcats" "stringr" "dplyr"
## [16] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [21] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [26] "datasets" "methods" "base"
##
## [[9]]
## [1] "interactions" "aod" "MASS" "survey" "survival"
## [6] "Matrix" "grid" "effects" "carData" "modelsummary"
## [11] "fst" "infer" "lubridate" "forcats" "stringr"
## [16] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [21] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [26] "utils" "datasets" "methods" "base"
##
## [[10]]
## [1] "kableExtra" "interactions" "aod" "MASS" "survey"
## [6] "survival" "Matrix" "grid" "effects" "carData"
## [11] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [21] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [26] "grDevices" "utils" "datasets" "methods" "base"
##
## [[11]]
## [1] "flextable" "kableExtra" "interactions" "aod" "MASS"
## [6] "survey" "survival" "Matrix" "grid" "effects"
## [11] "carData" "modelsummary" "fst" "infer" "lubridate"
## [16] "forcats" "stringr" "dplyr" "purrr" "readr"
## [21] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [26] "graphics" "grDevices" "utils" "datasets" "methods"
## [31] "base"
##
## [[12]]
## [1] "scales" "flextable" "kableExtra" "interactions" "aod"
## [6] "MASS" "survey" "survival" "Matrix" "grid"
## [11] "effects" "carData" "modelsummary" "fst" "infer"
## [16] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [21] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [26] "stats" "graphics" "grDevices" "utils" "datasets"
## [31] "methods" "base"
Use data for Germany and the variable coding for model3 and model4 from the tutorial (i.e., both for the cleaning & recode, as well as the linear model formulas). Do a modelsummary table displaying both model outputs. Interpret the coefficients for the MLR without the interaction (i.e., the first displayed model), and interpret model fit metrics for both models.
germany_data <- read.fst("germany_data.fst")
df <- germany_data
df <- df %>%
mutate(behave = ipbhprp,
secure = impsafe,
safety = ipstrgv,
tradition = imptrad,
rules = ipfrule) %>%
mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
# Apply the reverse coding
mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))
# Now you can calculate 'schwartzauth' after the NA recoding
df$auth <- scales::rescale(df$behave +
df$secure +
df$safety +
df$tradition +
df$rules, to=c(0,100), na.rm=TRUE)
df <- df %>% filter(!is.na(auth))
df <- df %>%
mutate(
polID = case_when(
lrscale %in% 0:3 ~ "Left",
lrscale %in% 7:10 ~ "Right",
lrscale %in% 4:6 ~ "Moderate",
lrscale %in% c(77, 88, 99) ~ NA_character_
),
religious = case_when(
rlgdgr %in% c(77, 88, 99) ~ NA_real_,
TRUE ~ rlgdgr
)
)
model3 <- lm(auth ~ polID + religious, data = df, weights = weight)
model4 <- lm(auth ~ polID + religious + polID*religious, data = df, weights = weight)
modelsummary(
list(model3, model4),
fmt = 1,
estimate = c( "{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL)
| Â (1) | Â Â (2) | |
|---|---|---|
| (Intercept) | 57.5 (0.7)*** | 59.6 (0.9)*** |
| polIDModerate | 5.3 (0.7)*** | 2.6 (1.1)* |
| polIDRight | 9.3 (1.1)*** | 3.4 (1.9)+ |
| religious | 0.3 (0.1)** | −0.3 (0.2)+ |
| polIDModerate × religious | 0.8 (0.2)** | |
| polIDRight × religious | 1.5 (0.4)*** | |
| Num.Obs. | 2855 | 2855 |
| R2 | 0.035 | 0.041 |
| R2 Adj. | 0.034 | 0.039 |
| AIC | 24680.0 | 24665.5 |
| BIC | 24709.7 | 24707.2 |
| Log.Lik. | −12334.978 | −12325.764 |
| RMSE | 17.13 | 17.13 |
INTERPRETATION: when looking at the data we can see that there is a relationship between the variables of political orientation, religiousness, and authoritarian values. That said, the output generates polmoderate of 5.3 this means that people who identify as politically moderate are 5.3 higher than those who are on the left. Additionally, those who identify as polright are 9.3 higher than those who are left. The number 0.3 represents that if an individual becomes more religious by one unit, this means their authoritarian values will increase by 0.3 on average. Moreover, if we look at the p-values which are smaller than 0.001 this means that there is a relationship between these variables highly unlikely to have occured by chance. That said, this means that the variables political orientation and religiousness affect authoritiarian values. lastly, the adjusted R-square of 0.034 means that these variables only explain a small portion of of the variation seen in authoritarian values among individuals, meaning other factors play a role as well.
Now generate the model4 interaction plot that we did in the tutorial, but again using the German data instead of the French. Interpret.
germany_data <- read_fst("germany_data.fst")
interaction_plot <- effect("polID*religious", model4, na.rm=TRUE)
plot(interaction_plot,
main="Interaction effect",
xlab="Religiousness",
ylab="Authoritarian attitudes scale")
interaction_plot
##
## polID*religious effect
## religious
## polID 0 2 5 8 10
## Left 59.61010 58.93398 57.91980 56.90562 56.22950
## Moderate 62.21299 63.14317 64.53843 65.93370 66.86388
## Right 63.00456 65.26702 68.66072 72.05442 74.31689
INTERPRETATION: PolID left shows that people who identify as left are less religious. people who identify as a little more religious has a more upwards slope. PolID right shows a really high upwards slope shows that people who identify as politically right have stronger religious beliefs based on the authoritarian scale.
Use data for the Netherlands and the variable coding for model5 and model6 from the tutorial (i.e., both for the cleaning & recode, as well as the linear model formulas). Do a modelsummary table displaying both model outputs. Interpret the coefficients for the MLR without the interaction (i.e., the first displayed model), and interpret model fit metrics for both models.
netherlands_data <- read_fst("netherlands_data.fst")
df <- netherlands_data
df <- df %>%
mutate(religion = case_when(
rlgblg == 2 ~ "No",
rlgblg == 1 ~ "Yes",
rlgblg %in% c(7, 8, 9) ~ NA_character_,
TRUE ~ as.character(rlgblg)
))
table(df$religion)
##
## No Yes
## 11289 7008
df <- df %>%
mutate(ID = case_when(
lrscale >= 0 & lrscale <= 4 ~ "Left",
lrscale >= 6 & lrscale <= 10 ~ "Right",
lrscale > 10 ~ NA_character_, # Set values above 10 as NA
TRUE ~ NA_character_ # Ensure value 5 and any other unexpected values are set as NA
))
table(df$ID)
##
## Left Right
## 5605 7247
df <- df %>%
mutate(
cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
gen = case_when(
yrbrn %in% 1900:1945 ~ "1",
yrbrn %in% 1946:1964 ~ "2",
yrbrn %in% 1965:1979 ~ "3",
yrbrn %in% 1980:1996 ~ "4",
TRUE ~ as.character(yrbrn)
),
gen = factor(gen,
levels = c("1", "2", "3", "4"),
labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials")),
)
table(df$gen)
##
## Interwar Baby Boomers Gen X Millennials
## 3967 6359 4684 2803
df <- df %>%
mutate(behave = ipbhprp,
secure = impsafe,
safety = ipstrgv,
tradition = imptrad,
rules = ipfrule) %>%
mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))
df$auth <- scales::rescale(df$behave +
df$secure +
df$safety +
df$tradition +
df$rules, to=c(0,100), na.rm=TRUE)
df <- df %>% filter(!is.na(auth))
df <- df %>%
mutate(
polID = case_when(
lrscale %in% 0:3 ~ "Left",
lrscale %in% 7:10 ~ "Right",
lrscale %in% 4:6 ~ "Moderate",
lrscale %in% c(77, 88, 99) ~ NA_character_
),
religious = case_when(
rlgdgr %in% c(77, 88, 99) ~ NA_real_,
TRUE ~ rlgdgr
)
)
model5 <- lm(auth ~ religion + ID + gen, data = df, weights = weight)
model6 <- lm(auth ~ religion + ID + gen + religion*gen, data = df, weights = weight)
modelsummary(
list(model5, model6),
fmt = 1,
estimate = c( "{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL,
coef_omit = "Intercept")
| Â (1) | Â Â (2) | |
|---|---|---|
| religionYes | 8.2 (0.9)*** | 7.5 (2.1)*** |
| IDRight | 3.4 (0.9)*** | 3.4 (0.9)*** |
| genBaby Boomers | −5.8 (1.3)*** | −6.5 (1.7)*** |
| genGen X | −7.3 (1.4)*** | −7.6 (1.8)*** |
| genMillennials | −8.6 (1.5)*** | −8.6 (1.9)*** |
| religionYes × genBaby Boomers | 1.6 (2.6) | |
| religionYes × genGen X | 0.5 (2.8) | |
| religionYes × genMillennials | −0.6 (3.2) | |
| Num.Obs. | 1147 | 1147 |
| R2 | 0.126 | 0.127 |
| R2 Adj. | 0.123 | 0.121 |
| AIC | 9479.3 | 9484.5 |
| BIC | 9514.7 | 9535.0 |
| Log.Lik. | −4732.669 | −4732.251 |
| RMSE | 14.94 | 14.94 |
cat_plot(model5, pred = gen, modx = religion, jnplot = TRUE)
## Warning: gen and religion are not included in an interaction with one another in the
## model.
cat_plot(model6, pred = gen, modx = religion, jnplot = TRUE)
INTERPRETATION:
when looking at model 5 the data ‘religionyes’ of 8.2 means that people who identify as religious tend to have an outcomes 8.2 units higher than average. Additionally, IDright has a number of 3.4 which means that those who have identify as politically right tend to be 3.4 units higher than average meaning they are more religious. Next, the negative coefficients for each generation represent that the babyboomers tend to score 5.8 units lower, gen X scores 7.3 units lower, and millienials score 8.6 units lower compared to authoritarian scale. The *** next to each coefficient for variables like genBaby Boomers, genGen X, and genMillennials means the p-value is less than 0.001 which means the variables do not significantly affect the outcome we’re studying with a very high level of confidence. The adjusted R square of 0.123 means that the data only accounts for a small number of the variation of the data, implying that other factors may affect.
when we look at model 6 the data ‘religion yes’ has a score of 7.5 which means people who identify as religious are 7.5 units above the average. Additionally, IDright tells us that people who identify as politically right are 3.4 units above the average meaning they are more religious.Next, the negative coefficients for each generation represent that the baby boomers tend to score 6.5 units lower, the gen X scores 7.6 units lower, and millenials score 8.6 units lower than the average to the authoritarian scale. Moreover, the adjusted r square of 0.121 means that the data only accounts for a small number of the variation of the data, implying that other factors may affect. Lastly, The *** on each coefficient shows that the p-value is 0.001, meaning they are not statistically significant.
Overall,both models show that those on the political right score significantly above the average which then shows a strong link between right-wing and authoritarian values.
Produce the model7 interaction plot from the tutorial, but again using the Netherlands data. Interpret.
netherlands_data <- read_fst("netherlands_data.fst")
interaction_plot
##
## polID*religious effect
## religious
## polID 0 2 5 8 10
## Left 59.61010 58.93398 57.91980 56.90562 56.22950
## Moderate 62.21299 63.14317 64.53843 65.93370 66.86388
## Right 63.00456 65.26702 68.66072 72.05442 74.31689
model7 <- lm(auth ~ + cohort + polID + cohort*polID, data = df, weights = weight)
interact_plot(model7, pred = cohort, modx = polID, jnplot = TRUE)
INTERPRETATION: There is converging which means that interaction occurs between the variables across the models.
Produce the model8 interaction plot from the tutorial, but again using the Netherlands data. Interpret.
model8 <- lm(auth ~ religious + ID + religious*ID, data = df, weights = weight)
interaction_plot
##
## polID*religious effect
## religious
## polID 0 2 5 8 10
## Left 59.61010 58.93398 57.91980 56.90562 56.22950
## Moderate 62.21299 63.14317 64.53843 65.93370 66.86388
## Right 63.00456 65.26702 68.66072 72.05442 74.31689
interact_plot(model8, pred = religious, modx = ID, jnplot = TRUE)
INTERPRETATION: The graph shows parallel lines which means there is a
correlation between left and right, authoritarianism, and religiousness.
Additionally, the higher the religiousness, the higher the more those
individuals have stronger authoritarian values. And again, the graph
shows that those who are right winged have higher religious and
authoritarian values.