European Social Survey (ESS) Data Visualization Practice Exercises

Initial set-up

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(fst)

# Read in the ESS data
france_data <- read_fst("france_data.fst")

Exercise 1: Basic Bar Plot

Task: Create a simple visualization of French respondents’ views on whether the government should reduce income differences (gincdif).

Requirements:

  1. Clean the gincdif variable by:

    • Removing missing values (7, 8)

    • Converting numeric codes (1-5) to meaningful labels

    • Creating an appropriate factor with ordered levels

  2. Create a basic vertical bar plot showing the count for each response category

  3. No customization needed - just the default ggplot appearance

Solution:

# Clean and prepare gincdif data
france_gincdif <- france_data %>%
  filter(!gincdif %in% c(7, 8)) %>% 
  mutate(
    income_view = case_when(
      gincdif == 1 ~ "Strongly agree",
      gincdif == 2 ~ "Agree",
      gincdif == 3 ~ "Neither agree nor disagree",
      gincdif == 4 ~ "Disagree",
      gincdif == 5 ~ "Strongly disagree",
      TRUE ~ NA_character_
    ),
    income_view = factor(income_view, 
                        levels = c("Strongly agree", "Agree", 
                                 "Neither agree nor disagree", 
                                 "Disagree", "Strongly disagree"))
  )

# Create basic bar plot
ggplot(france_gincdif, aes(x = income_view)) +
  geom_bar() +
  labs(
    x = "Response",
    y = "Count",
    title = "Views on Government Reducing Income Differences"
  )

Exercise 2: Adding Essential Customization

Task: Build a more informative visualization of satisfaction with health services (stfhlth).

Requirements:

  1. Clean the stfhlth variable by removing missing values

  2. Create a histogram with:

    • A meaningful title

    • Clear axis labels

    • A single color for all bars

    • The minimal theme

Solution:

# Clean and prepare stfhlth data
france_health <- france_data %>%
  filter(stfhlth >= 0 & stfhlth <= 10)  # Remove missing values

# Create customized histogram
ggplot(france_health, aes(x = stfhlth)) +
  geom_histogram(
    fill = "steelblue",  # Single color for bars
    binwidth = 1,        # Sensible bin width for 0-10 scale
    color = "white"      # White borders for distinction
  ) +
  labs(
    title = "Satisfaction with Health Services in France",
    x = "Satisfaction Level (0 = Extremely Bad, 10 = Extremely Good)",
    y = "Number of Respondents"
  ) +
  theme_minimal()

Exercise 3: Working with Proportions

Task: Create a proportional visualization of satisfaction with the education system (stfedu).

Requirements:

  1. Clean the stfedu variable

  2. Create a histogram showing proportions instead of counts

  3. Add clear title, subtitle, and professional color choice

Solution:

# Clean and prepare stfedu data
france_edu <- france_data %>%
  filter(stfedu >= 0 & stfedu <= 10)  # Remove missing values

# Calculate total number of responses for proportion conversion
total_responses <- nrow(france_edu)

# Create proportional histogram
ggplot(france_edu, aes(x = stfedu)) +
  geom_histogram(
    aes(y = ..count../total_responses),  # Convert to proportions
    binwidth = 1,
    fill = "#4B9CD3",    # Professional blue shade
    color = "white"      # White borders
  ) +
  # Format y-axis as percentage (multiply by 100)
  scale_y_continuous(
    labels = function(x) paste0(round(x * 100, 1), "%"),
    breaks = seq(0, 0.25, 0.05)
  ) +
  labs(
    title = "Satisfaction with Education System in France",
    subtitle = "Distribution of Responses on 0-10 Scale",
    x = "Satisfaction Level (0 = Extremely Bad, 10 = Extremely Good)",
    y = "Percentage of Respondents"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(color = "gray40"),
    panel.grid.minor = element_blank()
  )
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Exercise 4: Polished Visualization

Task: Create a polished visualization examining income inequality views (gincdif) by urban/rural location.

Requirements:

  1. Prepare two variables:

    • Clean gincdif as in Exercise 1

    • Create urban/rural categories from domicil (1-3: Urban, 4-5: Rural)

  2. Create a horizontal bar plot comparing urban/rural responses

Solution:

# Clean and prepare data
france_inequality <- france_data %>%
  mutate(
    # Clean income views
    income_view = case_when(
      gincdif == 1 ~ "Strongly agree",
      gincdif == 2 ~ "Agree",
      gincdif == 3 ~ "Neither agree nor disagree",
      gincdif == 4 ~ "Disagree",
      gincdif == 5 ~ "Strongly disagree",
      TRUE ~ NA_character_
    ),
    income_view = factor(income_view, 
                        levels = c("Strongly agree", "Agree", 
                                 "Neither agree nor disagree", 
                                 "Disagree", "Strongly disagree")),
    # Create urban/rural categories
    location = case_when(
      domicil %in% 1:3 ~ "Urban",
      domicil %in% 4:5 ~ "Rural",
      TRUE ~ NA_character_
    ),
    location = factor(location)
  ) %>%
  filter(!is.na(income_view), !is.na(location))
# Create polished horizontal bar plot
ggplot(france_inequality,
       aes(y = income_view, fill = location)) +
  geom_bar(
    position = "dodge",
    color = "white"
  ) +
  scale_fill_manual(
    values = c("Rural" = "#E69F00", "Urban" = "#56B4E9"),
    name = "Location"
  ) +
  labs(
    title = "Views on Income Inequality by Location",
    subtitle = "Government should reduce differences in income levels",
    y = NULL,  # Remove y-axis label as categories are self-explanatory
    x = "Number of Respondents"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(color = "gray40"),
    legend.position = "top",
    panel.grid.major.y = element_blank(),
    axis.text.y = element_text(size = 10)
  )