# List of packages
packages <- c("tidyverse", "fst", "modelsummary", "viridis") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: viridisLite
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "fst" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "viridis" "viridisLite" "modelsummary" "fst" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
Provide code and answer.
Prompt: in the tutorial, we calculated the average trust in others for France and visualized it. Using instead the variable ‘Trust in Parliament’ (trstplt) and the country of Spain (country file provided on course website), visualize the average trust by survey year. You can truncate the y-axis if you wish. Provide appropriate titles and labels given the changes. What are your main takeaways based on the visual (e.g., signs of increase, decrease, or stall)?
spain_data <- read.fst("spain_data.fst")
spain_data <- spain_data %>%
mutate(
trstplt = ifelse(trstplt %in% c(77, 88, 99), NA, trstplt), )
table(spain_data$trstplt)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 5165 1830 2329 2441 2085 2890 1154 639 355 80 71
spain_data$year <- NA
replacements <- c(2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020)
for(i in 1:10){
spain_data$year[spain_data$essround == i] <- replacements[i]
}
trust_by_year <- spain_data %>%
group_by(year) %>%
summarize(mean_trust = mean(trstplt, na.rm = TRUE))
trust_by_year
## # A tibble: 10 × 2
## year mean_trust
## <dbl> <dbl>
## 1 2002 3.41
## 2 2004 3.66
## 3 2006 3.49
## 4 2008 3.32
## 5 2010 2.72
## 6 2012 1.91
## 7 2014 2.23
## 8 2016 2.40
## 9 2018 2.55
## 10 2020 1.94
ggplot(trust_by_year, aes(x = year, y = mean_trust)) +
geom_line(color = "blue", size = 1) + # Line to show the trend
geom_point(color = "red", size = 4) + # Points to highlight each year's value
labs(title = "Trust in Others in spain (2002-2020)",
x = "Survey Year",
y = "Average Trust (0-5 scale)") +
ylim(0, 5) + # Setting the y-axis limits from 0 to 5
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
The graph illustrates that the trust range is often low, with all values
falling below 5. The year 2004 has the highest value, 3.65, signifying
the highest level of trust Spain has had in its parliament over the
years. Following this, there was a steady decline in trust, which peaked
in 2012 at 1.91, the lowest ever recorded average level of trust in
Spain’s parliament. After that, trust slightly increased until 2018,
after which it continued to decline and reached its second-lowest point
in 2020 at 1.93.
The primary conclusion drawn from the output values is that, on the whole, public trust in parliament has declined, albeit with some yearly fluctuations.
Provide answer only.
Prompt and question: Based on the figure we produced above called task2_plot, tell us: what are your main takeaways regarding France relative to Italy and Norway? Make sure to be concrete and highlight at least two important comparative trends visualized in the graph.
Answer: The response to feeling close to a party by cohort has gradually decreased showing a negative correlation in all three of the countries on task2_plot. Throughout the 1920–2000 cohort, Norway had the greatest value, France was in the center, and Italy had the lowest value. France, Italy, and Norway have all fallen, but they have all remained in the same place.
Furthermore, France began at the 1920 cohort with an almost similar distance between Norway and Italy in comparison to those two countries. However, when accounting for the 2000 cohort, the difference in value between France and Norway is much larger, with France having a value above 0.50 and Italy having a value much closer to 0.25.
Provide code and answer.
Question: What is the marginal percentage of Italian men who feel close to a particular political party?
italy_data <- read.fst("italy_data.fst")
italy_data <- italy_data %>%
mutate(
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
TRUE ~ NA_character_ # Set anything that is not 1 or 2 to NA
),
lrscale = case_when(
lrscale %in% 1 ~ "yes",
lrscale %in% 2 ~ "no",
TRUE ~ NA_character_ # Moderate (4, 5, 6) and special codes (77, 88, 99) set to NA
)
)
lrscale_percentages <- italy_data %>% # Begin with the dataset 'italy_data'
filter(!is.na(lrscale), !is.na(gndr)) %>% # Filter out rows where 'lrscale' or 'gender' is NA (missing data)
group_by(gndr, lrscale) %>% # Group the data by 'gender' and 'lrscale' categories
summarise(count = n(), .groups = 'drop') %>% # Summarise each group to get counts, and then drop groupings
mutate(percentage = count / sum(count) * 100) # Calculate percentage for each group by dividing count by total count and multiplying by 100
lrscale_percentages # The resulting dataframe
## # A tibble: 4 × 4
## gndr lrscale count percentage
## <chr> <chr> <int> <dbl>
## 1 Female no 225 33.7
## 2 Female yes 107 16.0
## 3 Male no 248 37.1
## 4 Male yes 88 13.2
The Survey Data’s Marginal Percentages: The percentage of Italian men who feel close to a particular political party is 13.2%.
Provide code and output only.
Prompt: In the tutorial, we calculated then visualized the percentage distribution for left vs. right by gender for France. Your task is to replicate the second version of the visualization but for the country of Sweden instead.
sweden_data <- read.fst("sweden_data.fst")
sweden_data <- sweden_data %>%
mutate(
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
TRUE ~ NA_character_
),
lrscale = case_when(
lrscale %in% 0:3 ~ "Left",
lrscale %in% 7:10 ~ "Right",
TRUE ~ NA_character_
)
)
lrscale_percentagesxx <- sweden_data %>%
filter(!is.na(lrscale), !is.na(gndr)) %>%
group_by(gndr, lrscale) %>%
summarise(count = n(), .groups = 'drop') %>%
mutate(percentage = count / sum(count) * 100)
lrscale_plot_v2 <- ggplot(lrscale_percentagesxx,
aes(x = percentage,
y = reorder(gndr, -percentage),
fill = gndr)) +
geom_col() +
coord_flip() +
guides(fill = "none") +
facet_wrap(~ lrscale, nrow = 1) +
labs(x = "Percentage of Respondents",
y = NULL,
title = "Political Orientation by Gender",
subtitle = "Comparing the percentage distribution of left vs. right for Sweden (2002-2020)") +
theme(plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 12),
axis.title.y = element_blank(),
legend.position = "bottom")
lrscale_plot_v2
More males are right leaning politically than left leaning, and more so
than females ## Task 5
Provide code and answer: In Hungary, what is the conditional probability of NOT feeling close to any particular party given that the person lives in a rural area?
hungary_data <- read.fst("hungary_data.fst")
hungary_data <- hungary_data %>%
mutate(
geo = recode(as.character(domicil),
'1' = "Urban",
'2' = "Urban",
'3' = "Rural",
'4' = "Rural",
'5' = "Rural",
'7' = NA_character_,
'8' = NA_character_,
'9' = NA_character_)
) %>%
filter(!is.na(clsprty), !is.na(geo))
hungary_data <- hungary_data %>%
filter(!is.na(clsprty)) %>%
mutate(
clsprty = case_when(
clsprty == 1 ~ "Yes",
clsprty == 2 ~ "No"
)
) %>%
filter(!is.na(clsprty))
cond <- hungary_data %>%
count(clsprty, geo) %>%
group_by(geo) %>%
mutate(prob = n / sum(n))
cond
## # A tibble: 4 × 4
## # Groups: geo [2]
## clsprty geo n prob
## <chr> <chr> <int> <dbl>
## 1 No Rural 6275 0.554
## 2 No Urban 2395 0.512
## 3 Yes Rural 5055 0.446
## 4 Yes Urban 2283 0.488
In Hungary, if a person lives in a rural location, the conditional probability that they do not feel close to any particular party is roughly 55.38%.