This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
packages <- c("tidyverse", "broom", "infer", "fst", "modelsummary", "effects", "survey", "interactions", "dplyr") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: carData
##
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
##
## Loading required package: grid
##
## Loading required package: Matrix
##
##
## Attaching package: 'Matrix'
##
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Loading required package: survival
##
##
## Attaching package: 'survey'
##
##
## The following object is masked from 'package:graphics':
##
## dotchart
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "broom" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "infer" "broom" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "fst" "infer" "broom" "lubridate" "forcats" "stringr"
## [7] "dplyr" "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [13] "tidyverse" "stats" "graphics" "grDevices" "utils" "datasets"
## [19] "methods" "base"
##
## [[5]]
## [1] "modelsummary" "fst" "infer" "broom" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[6]]
## [1] "effects" "carData" "modelsummary" "fst" "infer"
## [6] "broom" "lubridate" "forcats" "stringr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[7]]
## [1] "survey" "survival" "Matrix" "grid" "effects"
## [6] "carData" "modelsummary" "fst" "infer" "broom"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[8]]
## [1] "interactions" "survey" "survival" "Matrix" "grid"
## [6] "effects" "carData" "modelsummary" "fst" "infer"
## [11] "broom" "lubridate" "forcats" "stringr" "dplyr"
## [16] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [21] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [26] "datasets" "methods" "base"
##
## [[9]]
## [1] "interactions" "survey" "survival" "Matrix" "grid"
## [6] "effects" "carData" "modelsummary" "fst" "infer"
## [11] "broom" "lubridate" "forcats" "stringr" "dplyr"
## [16] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [21] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [26] "datasets" "methods" "base"
rm(list=ls()); gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 2655419 141.9 4311488 230.3 NA 4311488 230.3
## Vcells 4519363 34.5 10146329 77.5 16384 7066400 54.0
ess <- read_fst("All-ESS-Data.fst")
##Task 1: Graph the histogram distribution using the populist scale (as we did in the tutorial) for the country of Sweden. What do you note?
sweden_data <- read_fst("sweden_data.fst")
sweden_data$trstplt <- ifelse(sweden_data$trstplt > 10, NA, sweden_data$trstplt)
sweden_data$trstprl <- ifelse(sweden_data$trstprl > 10, NA, sweden_data$trstprl)
sweden_data$trstprt <- ifelse(sweden_data$trstprt > 10, NA, sweden_data$trstprt)
sweden_data$trust <- scales::rescale(sweden_data$trstplt + sweden_data$trstprl + sweden_data$trstprt, na.rm = TRUE, to = c(0, 100))
sweden_data$populist <- scales::rescale(sweden_data$trust, na.rm = TRUE, to=c(100,0))
ggplot(sweden_data, aes(x = populist)) +
geom_histogram(bins = 30, fill = "blue", color = "black") +
theme_minimal() +
labs(title = "Distribution of Populist Scale for Sweden",
x = "Populist Scale",
y = "Count")
## Warning: Removed 2503 rows containing non-finite values (`stat_bin()`).
In the graph above, I see that the populist scale has a spike at the
50-point line that extends over the 2000 mark. The remaining portion of
the graph is curved; it begins at a low point of about 0 and rises to a
peak at 50, after which it falls until it reaches 100.
sweden_data <- sweden_data %>%
mutate(
ID = case_when(
lrscale %in% 0:4 ~ "BA or more", # This time: Values 0 to 3 in 'lrscale' are categorized as "Left"
lrscale %in% 6:10 ~ "No BA", # This time: Values 6 to 10 in 'lrscale' are categorized as "Right"
lrscale %in% c(77, 88, 99) ~ NA_character_, # Values 77, 88, 99 in 'lrscale' are set as NA
TRUE ~ NA_character_ # Any other values not explicitly matched above
)
)
table(sweden_data$ID)
##
## BA or more No BA
## 6188 7406
sweden_pop_model <- lm(populist ~ ID, data = sweden_data)
coefficients_sweden <- coef(sweden_pop_model)
print(coefficients_sweden)
## (Intercept) IDNo BA
## 48.3188406 -0.7149274
Intercept 48.3: The average score for which individuals in Sweden have a BA is indicated by the intercept of 48.3. There are more persons with these degrees or more if the intercept is greater. Sweden’s average score falls in the centre.
ID No BA Coefficient -0.71: Those without BAs have a coefficient that is approximately 0.71 points lower than that of BA holders. In other words, compared to those without BAs, persons with BAs have a greater average level of populist sentiments.
tidy(sweden_pop_model)
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 48.3 0.265 183. 0
## 2 IDNo BA -0.715 0.356 -2.01 0.0444
With a value of 0.04, the p-value in this instance is not particularly high and, thus, has little relevance, according to the data set findings.
Model 1 = populist attitudes scale as the outcome; left/right as the single predictor (omitting moderates as we did in the tutorial), and using data for Greece.
Model 2 = populist attitudes scale as the outcome; gender as the single predictor, and using data for Greece.
Model 3 = populist attitudes scale as the outcome; generations as the single predictor (using the four generational category recode we did in the tutorial), and using data for Greece. Interpret.
greece_data <- read_fst("greece_data.fst")
greece_data <- greece_data %>%
mutate(
ID = case_when(
lrscale %in% 0:4 ~ "Left", # This time: Values 0 to 3 in 'lrscale' are categorized as "Left"
lrscale %in% 6:10 ~ "Right", # This time: Values 6 to 10 in 'lrscale' are categorized as "Right"
lrscale %in% c(77, 88, 99) ~ NA_character_, # Values 77, 88, 99 in 'lrscale' are set as NA
TRUE ~ NA_character_ # Any other values not explicitly matched above
)
)
table(greece_data$ID)
##
## Left Right
## 2688 3522
these results show that in greece people are more likely to be right leaning politically, with a higher populist attitudes.
model_data <- greece_data %>%
mutate(
# Recoding gender
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
gndr == 9 ~ NA_character_,
TRUE ~ as.character(gndr)
),
# Recoding education
educ.ba = case_when(
essround < 5 & edulvla == 5 ~ "BA or more",
essround >= 5 & edulvlb > 600 ~ "BA or more",
TRUE ~ "No BA"
),
# Handle NAs for education levels
edulvla = ifelse(edulvla %in% c(77, 88, 99), NA_integer_, edulvla),
edulvlb = ifelse(edulvlb %in% c(5555, 7777, 8888), NA_integer_, edulvlb),
# Explicitly making 'No BA' the reference category
educ.ba = factor(educ.ba, levels = c("No BA", "BA or more")),
# Recoding age, setting 999 to NA
age = ifelse(agea == 999, NA, agea),
# Recoding cohort variable, setting years before 1930 and after 2000 to NA
cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
# Recoding generational cohorts based on year of birth
gen = case_when(
yrbrn %in% 1900:1945 ~ "1",
yrbrn %in% 1946:1964 ~ "2",
yrbrn %in% 1965:1979 ~ "3",
yrbrn %in% 1980:1996 ~ "4",
TRUE ~ as.character(cohort) # Keeping other values as character if they do not fit the ranges
),
# Converting cohort to a factor with labels
gen = factor(gen,
levels = c("1", "2", "3", "4"),
labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
)
table(model_data$gen)
##
## Interwar Baby Boomers Gen X Millennials
## 2978 3484 3498 2330
The graphs’ results indicate that, compared to previous generations, gen xers are more likely to have more populist views, with baby boomers following closely after.