#Installing & Applying Packages
packages <-c("tidyverse", "modelsummary", "flextable", "fst", "viridis", "knitr", "effects", "rmarkdown", "survey", "interactions", "infer", "broom")
new_packages <-packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Version 2.0.0 of `modelsummary`, to be released soon, will introduce a
## breaking change: The default table-drawing package will be `tinytable`
## instead of `kableExtra`. All currently supported table-drawing packages
## will continue to be supported for the foreseeable future, including
## `kableExtra`, `gt`, `huxtable`, `flextable, and `DT`.
##
## You can always call the `config_modelsummary()` function to change the
## default table-drawing package in persistent fashion. To try `tinytable`
## now:
##
## config_modelsummary(factory_default = 'tinytable')
##
## To set the default back to `kableExtra`:
##
## config_modelsummary(factory_default = 'kableExtra')
##
##
## Attaching package: 'flextable'
##
##
## The following object is masked from 'package:purrr':
##
## compose
##
##
## Loading required package: viridisLite
##
## Loading required package: carData
##
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
##
## Loading required package: grid
##
## Loading required package: Matrix
##
##
## Attaching package: 'Matrix'
##
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Loading required package: survival
##
##
## Attaching package: 'survey'
##
##
## The following object is masked from 'package:graphics':
##
## dotchart
##
##
## Registered S3 methods overwritten by 'broom':
## method from
## tidy.glht jtools
## tidy.summary.glht jtools
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [6] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [11] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [16] "datasets" "methods" "base"
##
## [[3]]
## [1] "flextable" "modelsummary" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "fst" "flextable" "modelsummary" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "viridis" "viridisLite" "fst" "flextable" "modelsummary"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "knitr" "viridis" "viridisLite" "fst" "flextable"
## [6] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[7]]
## [1] "effects" "carData" "knitr" "viridis" "viridisLite"
## [6] "fst" "flextable" "modelsummary" "lubridate" "forcats"
## [11] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [16] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [21] "grDevices" "utils" "datasets" "methods" "base"
##
## [[8]]
## [1] "rmarkdown" "effects" "carData" "knitr" "viridis"
## [6] "viridisLite" "fst" "flextable" "modelsummary" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[9]]
## [1] "survey" "survival" "Matrix" "grid" "rmarkdown"
## [6] "effects" "carData" "knitr" "viridis" "viridisLite"
## [11] "fst" "flextable" "modelsummary" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [21] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [26] "grDevices" "utils" "datasets" "methods" "base"
##
## [[10]]
## [1] "interactions" "survey" "survival" "Matrix" "grid"
## [6] "rmarkdown" "effects" "carData" "knitr" "viridis"
## [11] "viridisLite" "fst" "flextable" "modelsummary" "lubridate"
## [16] "forcats" "stringr" "dplyr" "purrr" "readr"
## [21] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [26] "graphics" "grDevices" "utils" "datasets" "methods"
## [31] "base"
##
## [[11]]
## [1] "infer" "interactions" "survey" "survival" "Matrix"
## [6] "grid" "rmarkdown" "effects" "carData" "knitr"
## [11] "viridis" "viridisLite" "fst" "flextable" "modelsummary"
## [16] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [21] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [26] "stats" "graphics" "grDevices" "utils" "datasets"
## [31] "methods" "base"
##
## [[12]]
## [1] "broom" "infer" "interactions" "survey" "survival"
## [6] "Matrix" "grid" "rmarkdown" "effects" "carData"
## [11] "knitr" "viridis" "viridisLite" "fst" "flextable"
## [16] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [21] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [26] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [31] "datasets" "methods" "base"
#Import dataset
rm(list=ls()); gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 2572694 137.4 4470280 238.8 NA 4470280 238.8
## Vcells 4406487 33.7 10146329 77.5 16384 6654038 50.8
hungary_data <-read.fst("hungary_data.fst")
italy_data <- read.fst("italy_data.fst")
greece_data <-read.fst("greece_data.fst")
sweden_data<-read.fst("sweden_data.fst")
# Populist scale (Higher=anti-trust)
# Setting values greater than 10 to NA
sweden_data$trstplt <- ifelse(sweden_data$trstplt > 10, NA, sweden_data$trstplt)
sweden_data$trstprl <- ifelse(sweden_data$trstprl > 10, NA, sweden_data$trstprl)
sweden_data$trstprt <- ifelse(sweden_data$trstprt > 10, NA, sweden_data$trstprt)
# Creating and rescaling the trust variable
sweden_data$trust <-scales::rescale(sweden_data$trstplt + sweden_data$trstprl + sweden_data$trstprt, na.rm = TRUE, to=c(0,100))
#Rescale trust to create populist scale
sweden_data$populist <-scales::rescale(sweden_data$trust, na.rm=TRUE, to=c(100,0))
ggplot(sweden_data, aes(x=populist)) +
geom_histogram(bins=30, fill="hotpink", color="black") +
theme_minimal() +
labs(title="Distribution of Populist Scale for Sweden",
x="Populist Scale",
y="Count")
## Warning: Removed 2503 rows containing non-finite outside the scale range
## (`stat_bin()`).
# New dataframe for sweden data called dfs
dfs <- read.fst("sweden_data.fst")
dfs <- dfs %>%
mutate(
trstplt = ifelse(dfs$trstplt > 10, NA, dfs$trstplt),
trstprl = ifelse(dfs$trstprl > 10, NA, dfs$trstprl),
trstprt = ifelse(dfs$trstprt > 10, NA, dfs$trstprt))
# Creating and rescaling the trust variable
dfs$trust <-scales::rescale(dfs$trstplt + dfs$trstprl + dfs$trstprt, na.rm = TRUE, to=c(0,100))
# Rescale trust to create populist scale
dfs$populist <-scales::rescale(dfs$trust, na.rm=TRUE, to=c(100,0))
#Recoding education variable
dfs <- dfs %>%
mutate(
educ.ba = case_when(
essround < 5 & edulvla == 5 ~ "BA or more",
essround >= 5 & edulvlb > 600 ~ "BA or more",
TRUE ~ "No BA"),
#Recoding specific values to NA
edulvla = ifelse(edulvla %in% c(77, 88, 99), NA_integer_, edulvla),
edulvlb = ifelse(edulvlb %in% c(5555, 7777, 8888), NA_integer_, edulvlb),
#Converting educ.ba to a factor with specified levels
educ.ba = factor(educ.ba, levels = c("No BA", "BA or more")))
#Coefficients & Intercept
model<-lm(dfs$populist ~educ.ba, data = dfs)
coefficients_dfs <-coef(model)
print(coefficients_dfs)
## (Intercept) educ.baBA or more
## 51.358680 -7.868121
# tidy model table
tidy(model) |>
knitr::kable(digits = 3)
| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| (Intercept) | 51.359 | 0.181 | 283.363 | 0 |
| educ.baBA or more | -7.868 | 0.350 | -22.494 | 0 |
#new dataframe called dfg
dfg<-read.fst("greece_data.fst")
# Populist scale (Norris/Inglehart use low levels of trust in parliaments, politicians and parties as an indicator of populism). Higher=anti-trust
# Setting values greater than 10 to NA
dfg$trstplt <- ifelse(dfg$trstplt > 10, NA, dfg$trstplt)
dfg$trstprl <- ifelse(dfg$trstprl > 10, NA, dfg$trstprl)
dfg$trstprt <- ifelse(dfg$trstprt > 10, NA, dfg$trstprt)
# Creating and rescaling the trust variable
dfg$trust <-scales::rescale(dfg$trstplt + dfg$trstprl + dfg$trstprt, na.rm = TRUE, to=c(0,100))
#Rescale trust to create populist scale
dfg$populist <-scales::rescale(dfg$trust, na.rm=TRUE, to=c(100,0))
#Recoding lrscale, gndr, & gen variables
dfg<-dfg %>%
mutate(
lrscale = case_when(
lrscale %in% 0:4 ~ "Left",
lrscale %in% 6:10 ~ "Right", #Note: 5 set to NA because its "Moderate"
lrscale %in% c(77, 88, 99) ~NA_character_,
TRUE ~ NA_character_),
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
gndr == 9 ~ NA_character_,
TRUE ~ as.character(gndr)),
cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
gen=case_when(
yrbrn %in% 1900:1945 ~ "1",
yrbrn %in% 1946:1964 ~ "2",
yrbrn %in% 1965:1979 ~ "3",
yrbrn %in% 1980:1996 ~ "4",
TRUE ~ as.character(cohort)),
gen=factor(gen,
levels = c("1", "2", "3", "4"),
labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials")))%>%
filter(!is.na(lrscale) & !is.na(gndr))
table(dfg$gen)
##
## Interwar Baby Boomers Gen X Millennials
## 1549 1847 1664 1036
#Running linear regression models
model1 <- lm(dfg$populist ~ lrscale, data = dfg)
model2 <- lm(dfg$populist ~ gndr, data = dfg)
model3 <- lm(dfg$populist ~ gen, data = dfg)
#Create models list to display all three models in the same table and later graph
models <- list(
"Model 1" = lm(dfg$populist ~ lrscale, data = dfg),
"Model 2" = lm(dfg$populist ~ gndr, data = dfg),
"Model 3" = lm(dfg$populist ~ gen, data = dfg))
#View models in a table
modelsummary(models, fmt = 1,
estimate = c("{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL,
coef_rename = c("genBaby Boomers" = "Baby Boomers", "genGen X" = "Gen X", "genMillennials" = "Millennials", "gndrMale" = "Male", "lrscaleRight" = "Right"),
title = 'Regression models for populist attitudes for Greece')
| Model 1 | Model 2 | Model 3 | |
|---|---|---|---|
| (Intercept) | 72.0 (0.5)*** | 67.7 (0.5)*** | 63.0 (0.7)*** |
| Right | −8.8 (0.6)*** | ||
| Male | −1.2 (0.7)+ | ||
| Baby Boomers | 3.9 (0.9)*** | ||
| Gen X | 6.8 (0.9)*** | ||
| Millennials | 6.4 (1.0)*** | ||
| Num.Obs. | 4936 | 4936 | 4822 |
| R2 | 0.037 | 0.001 | 0.013 |
| R2 Adj. | 0.036 | 0.000 | 0.012 |
| AIC | 44723.8 | 44904.5 | 43832.6 |
| BIC | 44743.3 | 44924.1 | 43865.0 |
| Log.Lik. | −22358.894 | −22449.275 | −21911.303 |
| RMSE | 22.44 | 22.85 | 22.76 |
#Visualize the coefficients & 95% confidence interval
modelplot(models, coef_omit = 'Interc')