Task 1

Provide code and answer.

Prompt: in the tutorial, we calculated the average trust in others for France and visualized it. Using instead the variable ‘Trust in Parliament’ (trstplt) and the country of Spain (country file provided on course website), visualize the average trust by survey year. You can truncate the y-axis if you wish. Provide appropriate titles and labels given the changes. What are your main takeaways based on the visual (e.g., signs of increase, decrease, or stall)?

# List of packages
packages <- c("tidyverse", "fst", "modelsummary", "viridis") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: viridisLite
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "fst"       "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "modelsummary" "fst"          "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[4]]
##  [1] "viridis"      "viridisLite"  "modelsummary" "fst"          "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"
spain_data <- read.fst("spain_data.fst")
spain_data <- spain_data %>%
  mutate(
    trstplt= ifelse(trstplt %in% c(77, 88, 99), NA, trstplt),
  )
table(spain_data$trstplt)
## 
##    0    1    2    3    4    5    6    7    8    9   10 
## 5165 1830 2329 2441 2085 2890 1154  639  355   80   71
spain_data$year <- NA
replacements <- c(2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020)
for(i in 1:10){
  spain_data$year[spain_data$essround == i] <- replacements[i]
}
table(spain_data$year)
## 
## 2002 2004 2006 2008 2010 2012 2014 2016 2018 2020 
## 1729 1663 1876 2576 1885 1889 1925 1958 1668 2283
trstplt_by_year <- spain_data %>%
  group_by(year) %>%
  summarize(mean_trstplt = mean(trstplt, na.rm = TRUE))
trstplt_by_year
## # A tibble: 10 × 2
##     year mean_trstplt
##    <dbl>        <dbl>
##  1  2002         3.41
##  2  2004         3.66
##  3  2006         3.49
##  4  2008         3.32
##  5  2010         2.72
##  6  2012         1.91
##  7  2014         2.23
##  8  2016         2.40
##  9  2018         2.55
## 10  2020         1.94
ggplot(trstplt_by_year, aes(x = year, y = mean_trstplt)) +
  geom_line(color = "blue", size = 1) +  # Line to show the trend
  geom_point(color = "red", size = 3) +  # Points to highlight each year's value
  labs(title = "Trust in Parliment in Spain (2002-2020)", 
       x = "Survey Year", 
       y = "Average Trust (0-10 scale)") +
  ylim(0, 10) +  # Setting the y-axis limits from 0 to 10
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

##Answer within the lines of parliment there has been a slow decline. Between 2004 and 2012, there’s a steep decline, followed by a slight uptick from 2012 to 2018. However, the overarching observation would be a period of stagnation.

Task 2

Provide answer only.

Prompt and question: Based on the figure we produced above called task2_plot, tell us: what are your main takeaways regarding France relative to Italy and Norway? Make sure to be concrete and highlight at least two important comparative trends visualized in the graph.

When contrasting Norway with France, it becomes apparent that France boasts a smaller portion of individuals who express allegiance to a political party, yet it surpasses Italy in this regard. Notably, all three nations exhibit a downward trend, with France experiencing the most significant decline. Notably, Italy’s proportion is steadily approaching parity with France’s. ## Task 3

Provide code and answer.

Question: What is the marginal percentage of Italian men who feel close to a particular political party?

italy_data <- read.fst("italy_data.fst")
italy_data <- italy_data %>%
  mutate(
    gndr = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      TRUE ~ NA_character_  # Set anything that is not 1 or 2 to NA
    ),
    clsprty = case_when(
      clsprty %in% 1 ~ "yes",     
      clsprty %in% 2 ~ "no",     
      TRUE ~ NA_character_  
    )    
  ) 
clsprty_percentages <- italy_data %>% 
  filter(!is.na(clsprty), !is.na(gndr)) %>%  
  group_by(gndr, clsprty) %>% 
  summarise(count = n(), .groups = 'drop') %>%  
  mutate(percentage = count / sum(count) * 100)

clsprty_percentages
## # A tibble: 4 × 4
##   gndr   clsprty count percentage
##   <chr>  <chr>   <int>      <dbl>
## 1 Female no       3228       34.2
## 2 Female yes      1686       17.9
## 3 Male   no       2593       27.5
## 4 Male   yes      1936       20.5

##Answer: with regards to poltical party 20.5% of italian men feel a strong sense of affiliation to their poltical party

Task 4

Provide code and output only.

Prompt: In the tutorial, we calculated then visualized the percentage distribution for left vs. right by gender for France. Your task is to replicate the second version of the visualization but for the country of Sweden instead.

sweden_data <- read.fst("sweden_data.fst")
sweden_data <- sweden_data %>%
  mutate(
    gndr = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      TRUE ~ NA_character_  # Set anything that is not 1 or 2 to NA
    ),
     lrscale = case_when(
      lrscale %in% 0:3 ~ "Left",       # Left-wing (0 to 3)
      lrscale %in% 7:10 ~ "Right",     # Right-wing (7 to 10)
      TRUE ~ NA_character_  # Moderate (4, 5, 6) and special codes (77, 88, 99) set to NA 
    )    
  )
lrscale_percentages <- sweden_data %>% 
  filter(!is.na(lrscale), !is.na(gndr)) %>%  
  group_by(gndr,lrscale) %>% 
  summarise(count = n(), .groups = 'drop') %>%  
  mutate(percentage = count / sum(count) * 100)

lrscale_percentages
## # A tibble: 4 × 4
##   gndr   lrscale count percentage
##   <chr>  <chr>   <int>      <dbl>
## 1 Female Left     2296       23.0
## 2 Female Right    2530       25.3
## 3 Male   Left     2062       20.6
## 4 Male   Right    3107       31.1
lrscale_percentages <- sweden_data %>% 
  filter(!is.na(lrscale), !is.na(gndr)) %>%  
  group_by(gndr,lrscale) %>% 
  summarise(count = n(), .groups = 'drop') %>%  
  mutate(percentage = count / sum(count) * 100)

lrscale_percentages
## # A tibble: 4 × 4
##   gndr   lrscale count percentage
##   <chr>  <chr>   <int>      <dbl>
## 1 Female Left     2296       23.0
## 2 Female Right    2530       25.3
## 3 Male   Left     2062       20.6
## 4 Male   Right    3107       31.1
# Create a ggplot object for horizontal bar chart with the specified style
lrscale_plot_v2 <- ggplot(lrscale_percentages, 
            aes(x = percentage,  # Use percentage directly
                y = reorder(gndr, -percentage),  # Order bars within each gender
                fill = gndr)) +  # Fill color based on Gender

  # Create horizontal bar chart
  geom_col() +  # Draws the bars using the provided data
  coord_flip() +  # Flip coordinates to make bars horizontal

  # Remove fill color legend
  guides(fill = "none") +  # Removes legend for the fill aesthetic

  # Split the plot based on Political Orientation
  facet_wrap(~ lrscale, nrow = 1) +  # Separate plots for Left/Right

  # Labels and titles for the plot
  labs(x = "Percentage of Respondents",  # X-axis label
       y = NULL,  # Remove Y-axis label
       title = "Political Orientation by Gender",  # Main title
       subtitle = "Comparing the percentage distribution of left vs. right for France (2002-2020)") +  # Subtitle

  # Adjust visual properties of the plot
  theme(plot.title = element_text(size = 16, face = "bold"),  # Format title
        plot.subtitle = element_text(size = 12),  # Format subtitle
        axis.title.y = element_blank(),  # Remove Y-axis title
        legend.position = "bottom")  # Position the legend at the bottom

# Display the ggplot object
lrscale_plot_v2

Task 5

Provide code and answer: In Hungary, what is the conditional probability of NOT feeling close to any particular party given that the person lives in a rural area?

hungary_data <- read.fst("hungary_data.fst")
hungary_data <- hungary_data %>%
  mutate(
    geo = recode(as.character(domicil), 
                 '1' = "Urban", 
                 '2' = "Urban",
                 '3' = "Rural", 
                 '4' = "Rural", 
                 '5' = "Rural",
                 '7' = NA_character_,
                 '8' = NA_character_,
                 '9' = NA_character_)
  ) %>%
  filter(!is.na(clsprty), !is.na(geo))
hungary_data <- hungary_data %>%
  mutate(
    clsprty = case_when(
      clsprty == 1 ~ "yes",
      clsprty == 2 ~ "no",
      
    )
  ) %>%
  filter(!is.na(clsprty))
cond <- hungary_data %>%
  count(clsprty, geo) %>%
  group_by(geo) %>%
  mutate(prob = n / sum(n))

cond
## # A tibble: 4 × 4
## # Groups:   geo [2]
##   clsprty geo       n  prob
##   <chr>   <chr> <int> <dbl>
## 1 no      Rural  6275 0.554
## 2 no      Urban  2395 0.512
## 3 yes     Rural  5055 0.446
## 4 yes     Urban  2283 0.488

Answer For individuals residing in rural areas of Hungary, there’s approximately a 55.38% chance that they don’t feel a strong affiliation with any specific political party.