Initial set-up
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(fst)
# Read in the ESS data
france_data <- read_fst("france_data.fst")
Task: Create a simple visualization of French
respondents’ views on whether the government should reduce income
differences (gincdif
).
Requirements:
Clean the gincdif
variable by:
Removing missing values (7, 8)
Converting numeric codes (1-5) to meaningful labels
Creating an appropriate factor with ordered levels
Create a basic vertical bar plot showing the count for each response category
No customization needed - just the default ggplot appearance
Solution:
# Clean and prepare gincdif data
france_gincdif <- france_data %>%
filter(!gincdif %in% c(7, 8)) %>%
mutate(
income_view = case_when(
gincdif == 1 ~ "Strongly agree",
gincdif == 2 ~ "Agree",
gincdif == 3 ~ "Neither agree nor disagree",
gincdif == 4 ~ "Disagree",
gincdif == 5 ~ "Strongly disagree",
TRUE ~ NA_character_
),
income_view = factor(income_view,
levels = c("Strongly agree", "Agree",
"Neither agree nor disagree",
"Disagree", "Strongly disagree"))
)
# Create basic bar plot
ggplot(france_gincdif, aes(x = income_view)) +
geom_bar() +
labs(
x = "Response",
y = "Count",
title = "Views on Government Reducing Income Differences"
)
Task: Build a more informative visualization of
satisfaction with health services (stfhlth
).
Requirements:
Clean the stfhlth
variable by removing missing
values
Create a histogram with:
A meaningful title
Clear axis labels
A single color for all bars
The minimal theme
Solution:
# Clean and prepare stfhlth data
france_health <- france_data %>%
filter(stfhlth >= 0 & stfhlth <= 10) # Remove missing values
# Create customized histogram
ggplot(france_health, aes(x = stfhlth)) +
geom_histogram(
fill = "steelblue", # Single color for bars
binwidth = 1, # Sensible bin width for 0-10 scale
color = "white" # White borders for distinction
) +
labs(
title = "Satisfaction with Health Services in France",
x = "Satisfaction Level (0 = Extremely Bad, 10 = Extremely Good)",
y = "Number of Respondents"
) +
theme_minimal()
Task: Create a proportional visualization of
satisfaction with the education system (stfedu
).
Requirements:
Clean the stfedu
variable
Create a histogram showing proportions instead of counts
Add clear title, subtitle, and professional color choice
Solution:
# Clean and prepare stfedu data
france_edu <- france_data %>%
filter(stfedu >= 0 & stfedu <= 10) # Remove missing values
# Calculate total number of responses for proportion conversion
total_responses <- nrow(france_edu)
# Create proportional histogram
ggplot(france_edu, aes(x = stfedu)) +
geom_histogram(
aes(y = ..count../total_responses), # Convert to proportions
binwidth = 1,
fill = "#4B9CD3", # Professional blue shade
color = "white" # White borders
) +
# Format y-axis as percentage (multiply by 100)
scale_y_continuous(
labels = function(x) paste0(round(x * 100, 1), "%"),
breaks = seq(0, 0.25, 0.05)
) +
labs(
title = "Satisfaction with Education System in France",
subtitle = "Distribution of Responses on 0-10 Scale",
x = "Satisfaction Level (0 = Extremely Bad, 10 = Extremely Good)",
y = "Percentage of Respondents"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(color = "gray40"),
panel.grid.minor = element_blank()
)
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Task: Create a polished visualization examining
income inequality views (gincdif
) by urban/rural
location.
Requirements:
Prepare two variables:
Clean gincdif
as in Exercise 1
Create urban/rural categories from domicil
(1-3:
Urban, 4-5: Rural)
Create a horizontal bar plot comparing urban/rural responses
Solution:
# Clean and prepare data
france_inequality <- france_data %>%
mutate(
# Clean income views
income_view = case_when(
gincdif == 1 ~ "Strongly agree",
gincdif == 2 ~ "Agree",
gincdif == 3 ~ "Neither agree nor disagree",
gincdif == 4 ~ "Disagree",
gincdif == 5 ~ "Strongly disagree",
TRUE ~ NA_character_
),
income_view = factor(income_view,
levels = c("Strongly agree", "Agree",
"Neither agree nor disagree",
"Disagree", "Strongly disagree")),
# Create urban/rural categories
location = case_when(
domicil %in% 1:3 ~ "Urban",
domicil %in% 4:5 ~ "Rural",
TRUE ~ NA_character_
),
location = factor(location)
) %>%
filter(!is.na(income_view), !is.na(location))
# Create polished horizontal bar plot
ggplot(france_inequality,
aes(y = income_view, fill = location)) +
geom_bar(
position = "dodge",
color = "white"
) +
scale_fill_manual(
values = c("Rural" = "#E69F00", "Urban" = "#56B4E9"),
name = "Location"
) +
labs(
title = "Views on Income Inequality by Location",
subtitle = "Government should reduce differences in income levels",
y = NULL, # Remove y-axis label as categories are self-explanatory
x = "Number of Respondents"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(color = "gray40"),
legend.position = "top",
panel.grid.major.y = element_blank(),
axis.text.y = element_text(size = 10)
)