# List of packages
packages <- c("tidyverse", "fst", "modelsummary") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'fst' was built under R version 4.3.2
## Warning: package 'modelsummary' was built under R version 4.3.2
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "fst" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
ess <- read_fst("All-ESS-Data.fst")
## Warning: package 'fstcore' was built under R version 4.3.2
Provide code and answer.
Prompt: in the tutorial, we calculated the average trust in others for France and visualized it. Using instead the variable ‘Trust in Parliament’ (trstplt) and the country of Spain (country file provided on course website), visualize the average trust by survey year. You can truncate the y-axis if you wish. Provide appropriate titles and labels given the changes. What are your main takeaways based on the visual (e.g., signs of increase, decrease, or stall)?
spain_data <- read.fst("spain_data.fst")
spain_data <- spain_data %>%
mutate(
trstplt = ifelse(trstplt %in% c(77, 88, 99), NA, trstplt), # set values 77, 88, and 99 to NA.
)
table(spain_data$trstplt)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 5165 1830 2329 2441 2085 2890 1154 639 355 80 71
spain_data$year <- NA
replacements <- c(2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020)
for(i in 1:10){
spain_data$year[spain_data$essround == i] <- replacements[i]
}
table(spain_data$year)
##
## 2002 2004 2006 2008 2010 2012 2014 2016 2018 2020
## 1729 1663 1876 2576 1885 1889 1925 1958 1668 2283
trust_by_year <- spain_data %>%
group_by(year) %>%
summarize(mean_trust = mean(trstplt, na.rm = TRUE))
trust_by_year
## # A tibble: 10 × 2
## year mean_trust
## <dbl> <dbl>
## 1 2002 3.41
## 2 2004 3.66
## 3 2006 3.49
## 4 2008 3.32
## 5 2010 2.72
## 6 2012 1.91
## 7 2014 2.23
## 8 2016 2.40
## 9 2018 2.55
## 10 2020 1.94
ggplot(trust_by_year, aes(x = year, y = mean_trust)) +
geom_line(color = "blue", size = 1) + # Line to show the trend
geom_point(color = "red", size = 3) + # Points to highlight each year's value
labs(title = "Trust in Parliament in Spain (2002-2020)",
x = "Survey Year",
y = "Average Trust (0-10 scale)") +
ylim(0, 10) + # Setting the y-axis limits from 0 to 10
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Ans: The main takeway from this graph is that although there is a slight
increase in trust between 2013-2018, there is an overall gradual
decrease in trust in parliament.
Provide answer only.
Prompt and question: Based on the figure we produced above called task2_plot, tell us: what are your main takeaways regarding France relative to Italy and Norway? Make sure to be concrete and highlight at least two important comparative trends visualized in the graph.
Ans: Compared to figures of Italy and Norway, France has a smaller range of values which indicates that most people are neither trustful or distrustful. Instead, they are more neutral compared to countries like Italy and Norway.
Provide code and answer.
Question: What is the marginal percentage of Italian men who feel close to a particular political party?
italy_data <- read.fst("italy_data.fst")
italy_data <- italy_data %>%
mutate(
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
TRUE ~ NA_character_ # Set anything that is not 1 or 2 to NA
),
lrscale = case_when(
lrscale %in% 0:3 ~ "Left", # Left-wing (0 to 3)
lrscale %in% 7:10 ~ "Right", # Right-wing (7 to 10)
TRUE ~ NA_character_ # Moderate (4, 5, 6) and special codes (77, 88, 99) set to NA
)
)
lrscale_percentages <- italy_data %>% # Begin with the dataset 'france_data'
filter(!is.na(lrscale), !is.na(gndr)) %>% # Filter out rows where 'lrscale' or 'gender' is NA (missing data)
group_by(gndr, lrscale) %>% # Group the data by 'gender' and 'lrscale' categories
summarise(count = n(), .groups = 'drop') %>% # Summarise each group to get counts, and then drop groupings
mutate(percentage = count / sum(count) * 100) # Calculate percentage for each group by dividing count by total count and multiplying by 100
lrscale_percentages # The resulting dataframe
## # A tibble: 4 × 4
## gndr lrscale count percentage
## <chr> <chr> <int> <dbl>
## 1 Female Left 930 23.9
## 2 Female Right 955 24.5
## 3 Male Left 924 23.7
## 4 Male Right 1084 27.8
Ans:23.7% of Italian men feel closer to the left party while 27.8% of Italian men feel closer to the right party.
Provide code and output only.
Prompt: In the tutorial, we calculated then visualized the percentage distribution for left vs. right by gender for France. Your task is to replicate the second version of the visualization but for the country of Sweden instead.
sweden_data <- read.fst("sweden_data.fst")
sweden_data <- sweden_data %>%
mutate(
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
TRUE ~ NA_character_ # Set anything that is not 1 or 2 to NA
),
lrscale = case_when(
lrscale %in% 0:3 ~ "Left", # Left-wing (0 to 3)
lrscale %in% 7:10 ~ "Right", # Right-wing (7 to 10)
TRUE ~ NA_character_ # Moderate (4, 5, 6) and special codes (77, 88, 99) set to NA
)
)
lrscale_percentages <- sweden_data %>% # Begin with the dataset 'france_data'
filter(!is.na(lrscale), !is.na(gndr)) %>% # Filter out rows where 'lrscale' or 'gender' is NA (missing data)
group_by(gndr, lrscale) %>% # Group the data by 'gender' and 'lrscale' categories
summarise(count = n(), .groups = 'drop') %>% # Summarise each group to get counts, and then drop groupings
mutate(percentage = count / sum(count) * 100) # Calculate percentage for each group by dividing count by total count and multiplying by 100
lrscale_percentages # The resulting dataframe
## # A tibble: 4 × 4
## gndr lrscale count percentage
## <chr> <chr> <int> <dbl>
## 1 Female Left 2296 23.0
## 2 Female Right 2530 25.3
## 3 Male Left 2062 20.6
## 4 Male Right 3107 31.1
lrscale_plot <- ggplot(lrscale_percentages, aes(x = lrscale, y = percentage, fill = lrscale)) +
geom_bar(stat = "identity", position = position_dodge()) + # Dodged bar chart
facet_wrap(~ gndr, scales = "fixed") + # Fixed scales for y-axis across facets
scale_fill_brewer(palette = "Set1") + # Distinct colors for Left and Right
labs(
title = "Political Orientation (Left vs. Right) by Gender in Sweden",
x = "Political Orientation",
y = "Percentage of Respondents",
fill = "Orientation"
) +
theme_minimal() + # Minimal theme for clarity
theme(legend.position = "bottom") # Legend at the bottom
# Display the ggplot object
lrscale_plot
Provide code and answer: In Hungary, what is the conditional probability of NOT feeling close to any particular party given that the person lives in a rural area?
hungary_data <- read.fst("hungary_data.fst")
hungary_data <- hungary_data %>%
mutate(
geo = recode(as.character(domicil),
'1' = "Urban",
'2' = "Urban",
'3' = "Rural",
'4' = "Rural",
'5' = "Rural",
'7' = NA_character_,
'8' = NA_character_,
'9' = NA_character_)
) %>%
filter(!is.na(lrscale), !is.na(geo)) # Removing rows with NA in clsprty or geo
# Calculate conditional probabilities, excluding NAs
cond <- hungary_data %>%
count(lrscale, geo) %>%
group_by(geo) %>%
mutate(prob = n / sum(n))
cond
## # A tibble: 28 × 4
## # Groups: geo [2]
## lrscale geo n prob
## <dbl> <chr> <int> <dbl>
## 1 0 Rural 314 0.0266
## 2 0 Urban 234 0.0483
## 3 1 Rural 233 0.0198
## 4 1 Urban 107 0.0221
## 5 2 Rural 469 0.0398
## 6 2 Urban 215 0.0444
## 7 3 Rural 637 0.0540
## 8 3 Urban 337 0.0696
## 9 4 Rural 632 0.0536
## 10 4 Urban 332 0.0686
## # ℹ 18 more rows
Ans: Given that someone resides in a rural area, the probability of them not feeling close to any particular party is 2.66%