# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "interactions") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'infer' was built under R version 4.3.3
## Warning: package 'effects' was built under R version 4.3.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.3.3
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## Warning: package 'survey' was built under R version 4.3.3
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loading required package: survival
##
## Attaching package: 'survey'
##
## The following object is masked from 'package:graphics':
##
## dotchart
## Warning: package 'interactions' was built under R version 4.3.3
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "infer" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "fst" "infer" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "effects" "carData" "modelsummary" "fst" "infer"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "survey" "survival" "Matrix" "grid" "effects"
## [6] "carData" "modelsummary" "fst" "infer" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[7]]
## [1] "interactions" "survey" "survival" "Matrix" "grid"
## [6] "effects" "carData" "modelsummary" "fst" "infer"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
Graph the histogram distribution using the populist scale (as we did in the tutorial) for the country of Sweden. What do you note?
sweden_data <- read.fst("sweden_data.fst")
# Setting values greater than 10 to NA
sweden_data$trstplt <- ifelse(sweden_data$trstplt > 10, NA, sweden_data$trstplt)
sweden_data$trstprl <- ifelse(sweden_data$trstprl > 10, NA, sweden_data$trstprl)
sweden_data$trstprt <- ifelse(sweden_data$trstprt > 10, NA, sweden_data$trstprt)
# Creating and rescaling the trust variable
sweden_data$trust <- scales::rescale(sweden_data$trstplt + sweden_data$trstprl + sweden_data$trstprt, na.rm = TRUE, to = c(0, 100))
# Here's how you would "flip it" because populist is conceived by N + I as 'anti-trust'
# As Schafer explains there are some issues with that conceptualization, but N + I is also widely applied
sweden_data$populist <- scales::rescale(sweden_data$trust, na.rm = TRUE, to=c(100,0))
ggplot(sweden_data, aes(x = populist)) +
geom_histogram(bins = 30, fill = "blue", color = "black") +
theme_minimal() +
labs(title = "Distribution of Populist Scale for Sweden",
x = "Populist Scale",
y = "Count")
## Warning: Removed 2503 rows containing non-finite values (`stat_bin()`).
Based on the above histograph it demonstrates that the right side is
skewed indicating that higher levels of populism versus lower levels of
populism. Mode of this data appears to be 50.
Run a linear regression with populist attitudes as the outcome, educational attainment (recoded as a binary “BA or more” and “No BA”), and using the Swedish data. Print the intercept and coefficients only and interpret. Then do a tidy model table displaying the p-value (with digits = 3) and interpret.
sweden_data <- sweden_data %>%
mutate(
educ.ba = case_when(
essround < 5 & edulvla == 5 ~ "BA or more",
essround >= 5 & edulvlb > 600 ~ "BA or more",
TRUE ~ "No BA"
),
edulvla = ifelse(edulvla %in% c(77, 88, 99), NA_integer_, edulvla),
edulvlb = ifelse(edulvlb %in% c(5555, 7777, 8888), NA_integer_, edulvlb),
educ.ba = factor(educ.ba, levels = c("No BA", "BA or more"))
)
sweden_pop_model <- lm(populist ~ educ.ba, data = sweden_data)
coefficients_sweden <- coef(sweden_pop_model)
print(coefficients_sweden)
## (Intercept) educ.baBA or more
## 51.358680 -7.868121
broom::tidy
## Registered S3 methods overwritten by 'broom':
## method from
## tidy.glht jtools
## tidy.summary.glht jtools
## function (x, ...)
## {
## UseMethod("tidy")
## }
## <bytecode: 0x000002280ebfb000>
## <environment: namespace:generics>
tidy (sweden_pop_model)
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 51.4 0.181 283. 0
## 2 educ.baBA or more -7.87 0.350 -22.5 2.58e-110
tidy(sweden_pop_model) |>
knitr::kable(digits = 3)
| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| (Intercept) | 51.359 | 0.181 | 283.363 | 0 |
| educ.baBA or more | -7.868 | 0.350 | -22.494 | 0 |
summary(sweden_pop_model)
##
## Call:
## lm(formula = populist ~ educ.ba, data = sweden_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -51.359 -14.692 -1.359 11.975 56.509
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 51.3587 0.1812 283.36 <2e-16 ***
## educ.baBA or more -7.8681 0.3498 -22.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.43 on 15711 degrees of freedom
## (2503 observations deleted due to missingness)
## Multiple R-squared: 0.0312, Adjusted R-squared: 0.03114
## F-statistic: 506 on 1 and 15711 DF, p-value: < 2.2e-16
Based on the displayed data it depicts that the intercept is 51.38 and the co-efficent is -7.8. This indicates that the intercept has a poplist scale of zero while the co-effeicent suggests that those with BA or more with likely experience a decrease over time. Additionally the p-value is less than one demonstrating that th ehypothesis is null.
Now using the Italian dataset and the authoritarian values scale (as we did in the tutorial), graph the average by survey year. Interpret.
italy_data <- read.fst("italy_data.fst")
italy_data <- italy_data %>%
mutate(behave = ipbhprp,
secure = impsafe,
safety = ipstrgv,
tradition = imptrad,
rules = ipfrule) %>%
mutate(across(c("behave", "secure", "safety", "tradition", "rules"),
~ na_if(.x, 7) %>% na_if(8) %>% na_if(9))) %>%
mutate(across(c("behave", "secure", "safety", "tradition", "rules"), ~ 7 - .x ))
italy_data$auth <- scales::rescale(italy_data$behave +
italy_data$secure +
italy_data$safety +
italy_data$tradition +
italy_data$rules, to=c(0,100), na.rm=TRUE)
italy_data <- italy_data %>% filter(!is.na(auth))
table(italy_data$secure)
##
## 1 2 3 4 5 6
## 54 139 626 1856 2876 2881
table(italy_data$auth)
##
## 0 12 16 20 24 28 32 36 40 44 48 52 56 60 64 68 72 76 80 84
## 7 4 4 9 13 20 27 57 113 155 163 323 367 596 647 789 823 897 992 733
## 88 92 96 100
## 632 465 299 297
italy_data$year <- NA
replacements <- c(2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020)
for(i in 1:10){
italy_data$year[italy_data$essround == i] <- replacements[i]
}
auth_avg <- italy_data %>%
group_by(year) %>%
summarize(auth_avg = mean(auth, na.rm = TRUE))
plot_auth <- ggplot(auth_avg, aes(x = year, y = auth_avg)) +
geom_point(aes(color = auth_avg), alpha = 1.0) +
geom_line(aes(group = 1), color = "blue", linetype = "dashed") +
labs(title = "Authoritarian Scale Average by Year (Italy)",
x = "Survey Year",
y = "Authoritarian Scale Average") +
theme_minimal() +
theme(legend.position = "none") +
scale_y_continuous(limits = c(0, 100))
print(plot_auth)
The graph depicts a steady decrease, however a more drastic one just
after 2015.
Now run a linear regression model, using the modelsummary package, with authoritarian values as the outcome, and generations as the predictor/potential explanatory variable, and using the Italian data again. Rename the coefficients using the coef_rename function. Interpret the coefficients, whether they are statistically significant, and the adjusted R-squared.
Now visualize, using the modelplot() function from the modelsummary package, coefficient estimates and 95% confidence intervals for the following three models:
Model 1 = populist attitudes scale as the outcome; left/right as the single predictor (omitting moderates as we did in the tutorial), and using data for Greece.
Model 2 = populist attitudes scale as the outcome; gender as the single predictor, and using data for Greece.
Model 3 = populist attitudes scale as the outcome; generations as the single predictor (using the four generational category recode we did in the tutorial), and using data for Greece. Interpret.
greece_data <- read.fst("greece_data.fst")
greece_data$trstplt <- ifelse(greece_data$trstplt > 10, NA, greece_data$trstplt)
greece_data$trstprl <- ifelse(greece_data$trstprl > 10, NA, greece_data$trstprl)
greece_data$trstprt <- ifelse(greece_data$trstprt > 10, NA, greece_data$trstprt)
greece_data$trust <- scales::rescale(greece_data$trstplt + greece_data$trstprl + greece_data$trstprt, na.rm = TRUE, to = c(0, 100))
greece_data$populist <- scales::rescale(greece_data$trust, na.rm = TRUE, to=c(100,0))
model_data <- greece_data %>%
mutate(
# Recoding gender
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
gndr == 9 ~ NA_character_,
TRUE ~ as.character(gndr)
),
lrscale = case_when(
lrscale %in% 0:4 ~ "Left", # This time: Values 0 to 3 in 'lrscale' are categorized as "Left"
lrscale %in% 6:10 ~ "Right", # This time: Values 6 to 10 in 'lrscale' are categorized as "Right"
lrscale %in% c(77, 88, 99) ~ NA_character_, # Values 77, 88, 99 in 'lrscale' are set as NA
TRUE ~ NA_character_ # Any other values not explicitly matched above
),
# Recoding cohort variable, setting years before 1930 and after 2000 to NA
cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
# Recoding generational cohorts based on year of birth
gen = case_when(
yrbrn %in% 1900:1945 ~ "1",
yrbrn %in% 1946:1964 ~ "2",
yrbrn %in% 1965:1979 ~ "3",
yrbrn %in% 1980:1996 ~ "4",
TRUE ~ as.character(cohort) # Keeping other values as character if they do not fit the ranges
),
# Converting cohort to a factor with labels
gen = factor(gen,
levels = c("1", "2", "3", "4"),
labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
)
table(model_data$gen)
##
## Interwar Baby Boomers Gen X Millennials
## 2978 3484 3498 2330
model1 <- lm(populist ~ lrscale, data = model_data)
model2 <- lm(populist ~ gndr, data = model_data)
model3 <- lm(populist ~ gen, data = model_data)
models <- list(
"Model 1" = lm(populist ~ lrscale, data = model_data),
"Model 2" = lm(populist ~ gndr, data = model_data),
"Model 3" = lm(populist ~ gen, data = model_data))
modelsummary(models, fmt = 1,
estimate = c("{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL,
coef_rename = c("lrscale" = "Right", "gndrMale" = "Male", "genBaby Boomers" = "Baby Boomers", "genGen X" = "Gen X", "genMillennials" = "Millennials"),
title = 'Table 3. Regression models for populist attitudes')
| Model 1 | Model 2 | Model 3 | |
|---|---|---|---|
| (Intercept) | 72.0 (0.5)*** | 70.0 (0.3)*** | 65.8 (0.5)*** |
| lrscaleRight | −8.8 (0.6)*** | ||
| Male | −1.0 (0.5)* | ||
| Baby Boomers | 3.5 (0.7)*** | ||
| Gen X | 5.6 (0.7)*** | ||
| Millennials | 5.8 (0.7)*** | ||
| Num.Obs. | 4936 | 9823 | 9562 |
| R2 | 0.037 | 0.000 | 0.009 |
| R2 Adj. | 0.036 | 0.000 | 0.009 |
| AIC | 44723.8 | 88873.2 | 86473.9 |
| BIC | 44743.3 | 88894.8 | 86509.7 |
| Log.Lik. | −22358.894 | −44433.590 | −43231.944 |
| RMSE | 22.44 | 22.30 | 22.25 |
modelplot(models, coef_omit = 'Interc')
Based on the suggested findings, i demonstrates that there is a gender coeffiecent of -0.1 which shows that it is lower in comparison to women. The Generational differences scores close to one another but do have some differences, Baby Boomers scoring about a 4 on the scale, which is lower than both Millienals and GenX indicating that there is a lower populist. Lastly, the r-squared value of -9demonstrates that about 9% of teh Greek society have an authoritan ideology.