knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)

# Load required packages
packages <- c(
  "tidyverse",
  "gt",
  "gapminder",
  "srvyr",
  "srvyrexploR",
  "fst",
  "ggridges"
)

# Install if needed and load
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Attaching package: 'srvyr'
## 
## 
## The following object is masked from 'package:stats':
## 
##     filter
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "gt"        "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "gapminder" "gt"        "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "srvyr"     "gapminder" "gt"        "lubridate" "forcats"   "stringr"  
##  [7] "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [13] "tidyverse" "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [19] "methods"   "base"     
## 
## [[5]]
##  [1] "srvyrexploR" "srvyr"       "gapminder"   "gt"          "lubridate"  
##  [6] "forcats"     "stringr"     "dplyr"       "purrr"       "readr"      
## [11] "tidyr"       "tibble"      "ggplot2"     "tidyverse"   "stats"      
## [16] "graphics"    "grDevices"   "utils"       "datasets"    "methods"    
## [21] "base"       
## 
## [[6]]
##  [1] "fst"         "srvyrexploR" "srvyr"       "gapminder"   "gt"         
##  [6] "lubridate"   "forcats"     "stringr"     "dplyr"       "purrr"      
## [11] "readr"       "tidyr"       "tibble"      "ggplot2"     "tidyverse"  
## [16] "stats"       "graphics"    "grDevices"   "utils"       "datasets"   
## [21] "methods"     "base"       
## 
## [[7]]
##  [1] "ggridges"    "fst"         "srvyrexploR" "srvyr"       "gapminder"  
##  [6] "gt"          "lubridate"   "forcats"     "stringr"     "dplyr"      
## [11] "purrr"       "readr"       "tidyr"       "tibble"      "ggplot2"    
## [16] "tidyverse"   "stats"       "graphics"    "grDevices"   "utils"      
## [21] "datasets"    "methods"     "base"

Task 1: Global Life Expectancy Changes

A. Data Manipulation

life_exp_summary <- gapminder %>%
  filter(year %in% c(1987, 2007)) %>%  
  group_by(continent, country) %>%
  summarize(
    start_life = first(lifeExp),
    end_life = last(lifeExp),
    change = end_life - start_life,
    avg_life = mean(lifeExp),  
    .groups = "drop"
  ) %>%
  arrange(avg_life)
life_exp_summary
## # A tibble: 142 × 6
##    continent country       start_life end_life  change avg_life
##    <fct>     <fct>              <dbl>    <dbl>   <dbl>    <dbl>
##  1 Africa    Sierra Leone        40.0     42.6  2.56       41.3
##  2 Africa    Angola              39.9     42.7  2.83       41.3
##  3 Asia      Afghanistan         40.8     43.8  3.01       42.3
##  4 Africa    Mozambique          42.9     42.1 -0.779      42.5
##  5 Africa    Guinea-Bissau       41.2     46.4  5.14       43.8
##  6 Africa    Rwanda              44.0     46.2  2.22       45.1
##  7 Africa    Liberia             46.0     45.7 -0.349      45.9
##  8 Africa    Somalia             44.5     48.2  3.66       46.3
##  9 Africa    Zambia              50.8     42.4 -8.44       46.6
## 10 Africa    Nigeria             46.9     46.9 -0.0270     46.9
## # ℹ 132 more rows
key_cases <- life_exp_summary %>%
  filter(country %in% c("Niger", "Bangladesh", 
                       "El Salvador", "Iraq", "Zimbabwe"))
print(key_cases)
## # A tibble: 5 × 6
##   continent country     start_life end_life change avg_life
##   <fct>     <fct>            <dbl>    <dbl>  <dbl>    <dbl>
## 1 Africa    Niger             44.6     56.9  12.3      50.7
## 2 Africa    Zimbabwe          62.4     43.5 -18.9      52.9
## 3 Asia      Bangladesh        52.8     64.1  11.2      58.4
## 4 Asia      Iraq              65.0     59.5  -5.50     62.3
## 5 Americas  El Salvador       63.2     71.9   8.72     67.5

B. Table Creation

life_exp_table <- gapminder %>%
  filter(year %in% c(1987, 2007)) %>%
  group_by(continent, year) %>%
  summarize(
    mean_life = mean(lifeExp),
    n = n(),
    .groups = "drop"
  ) %>%
  pivot_wider(
    id_cols = continent,
    names_from = year,
    values_from = mean_life,
    names_prefix = "mean_"
  ) %>%
  mutate(change = mean_2007 - mean_1987) %>%
  arrange(desc(change))

life_exp_table %>%
  gt() %>%
  fmt_number(
    columns = c(mean_1987, mean_2007, change),
    decimals = 1
  ) %>%
  tab_header(
    title = md("**Life Expectancy Changes by Continent**"),
    subtitle = "Average life expectancy in years"
  ) %>%
  cols_label(
    continent = md("**Continent**"),
    mean_1987 = md("**1987**"),
    mean_2007 = md("**2007**"),
    change = md("**Change**")
  ) %>%
  tab_source_note(
    source_note = "Data: Gapminder"
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold")
    ),
    locations = cells_column_labels()
  ) %>%
  tab_options(
    table.border.top.width = 2,
    table.border.bottom.width = 2
  )
Life Expectancy Changes by Continent
Average life expectancy in years
Continent 1987 2007 Change
Asia 64.9 70.7 5.9
Americas 68.1 73.6 5.5
Oceania 75.3 80.7 5.4
Europe 73.6 77.6 4.0
Africa 53.3 54.8 1.5
Data: Gapminder

C. Data Visualization

gapminder %>%
  filter(
    year >= 1987, 
    year <= 2007,
    country %in% c("Niger", "Bangladesh", "El Salvador", 
                   "Iraq", "Zimbabwe")
  ) %>%
  ggplot(aes(x = year, y = lifeExp, color = country)) +
  geom_line(size = 1.5) +
  scale_color_brewer(palette = "Set1") +
  labs(
    title = "Life Expectancy Trajectories (1987-2007)",
    subtitle = "in Selected Countries",
    x = "Year",
    y = "Life Expectancy (years)",
    color = "Country"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 12),
    legend.position = "bottom",
    panel.grid.minor = element_blank(),
    # Add professional polish
    axis.title = element_text(size = 10),
    axis.text = element_text(size = 9, color = "gray30"),
    panel.grid.major = element_line(color = "gray90")
  )

D. Interpretation

Generally to discuss and highlight clear regional disparity with Africa:

  • Asia and the Americas saw the largest gains, with increases of 5.9 and 5.5 years respectively

  • Despite high initial levels, Oceania continued to improve with a 5.4 year increase

  • Europe, starting from a high base of 73.6 years, still gained 4.0 years

  • Most concerningly, Africa showed minimal progress with only a 1.5 year increase, starting from and remaining at dramatically lower levels (53.3 to 54.8 years)

This pattern highlights a concerning trend: regions starting with higher life expectancy generally saw larger absolute gains, while Africa, beginning with the lowest life expectancy, experienced the smallest improvement.

For the visualization for selected countries, to highlight the different lige expectancy trajectories and notably the Niger/Zimbabwe contrast (so to showcase within-region variation despite the overall regional trend highlighted above):

For instance, discussing how: Niger shows a remarkable positive transformation, with life expectancy increasing by 12.3 years (from 44.6 to 56.9 years). This represents one of the largest improvements globally during this period, though starting from a very low base. In contrast, Zimbabwe experienced a devastating decline of 18.9 years (from 62.4 to 43.5 years), one of the steepest drops in life expectancy recorded in recent history, likely reflecting the combined impact of the HIV/AIDS epidemic and economic crisis. Bangladesh and El Salvador demonstrate steady progress from different starting points. Bangladesh achieved substantial gains of 11.2 years (from 52.8 to 64.1 years), while El Salvador saw more moderate but consistent improvement of 8.7 years (from 63.2 to 71.9 years). Iraq, however, illustrates the impact of political instability and conflict, with life expectancy declining by 5.5 years (from 65.0 to 59.5 years).

Task 2: Interpersonal Trust Across Age Groups

A. Data Manipulation

trust_by_age <- anes_2020 %>%
  filter(!is.na(TrustPeople), !is.na(AgeGroup)) %>%
  group_by(AgeGroup) %>%
  count(TrustPeople) %>%
  mutate(
    total = sum(n),
    percent = (n / total) * 100
  ) %>%
  ungroup()

trust_by_age
## # A tibble: 30 × 5
##    AgeGroup TrustPeople             n total percent
##    <fct>    <fct>               <int> <int>   <dbl>
##  1 18-29    Always                  7   871   0.804
##  2 18-29    Most of the time      268   871  30.8  
##  3 18-29    About half the time   278   871  31.9  
##  4 18-29    Some of the time      246   871  28.2  
##  5 18-29    Never                  72   871   8.27 
##  6 30-39    Always                 10  1239   0.807
##  7 30-39    Most of the time      502  1239  40.5  
##  8 30-39    About half the time   378  1239  30.5  
##  9 30-39    Some of the time      281  1239  22.7  
## 10 30-39    Never                  68  1239   5.49 
## # ℹ 20 more rows
total_sample <- sum(trust_by_age$n)

total_sample
## [1] 7153

B. Table Creation

trust_by_age %>%
  pivot_wider(
    id_cols = AgeGroup,
    names_from = TrustPeople,
    values_from = percent
  ) %>%
  gt() %>%
  fmt_number(
    columns = c("Always", "Most of the time", 
                "About half the time", "Some of the time", 
                "Never"),
    decimals = 1
  ) %>%
  tab_header(
    title = md("**Interpersonal Trust by Age Group**"),
    subtitle = "Distribution of responses (percentages)"
  ) %>%
  cols_label(
    AgeGroup = md("**Age Group**")
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold")
    ),
    locations = cells_column_labels()
  ) %>%
  tab_source_note(md(
    sprintf("Data: ANES 2020 (n = %d)", total_sample)
  ))
Interpersonal Trust by Age Group
Distribution of responses (percentages)
Age Group Always Most of the time About half the time Some of the time Never
18-29 0.8 30.8 31.9 28.2 8.3
30-39 0.8 40.5 30.5 22.7 5.5
40-49 0.7 44.1 29.1 22.9 3.2
50-59 0.2 48.9 27.1 20.8 3.1
60-69 0.7 52.4 25.2 19.8 1.9
70 or older 0.6 59.2 21.6 17.3 1.3
Data: ANES 2020 (n = 7153)

C. Data Visualization

ggplot(trust_by_age,
       aes(x = fct_rev(AgeGroup), y = percent, 
           fill = fct_rev(TrustPeople))) +
  geom_col() +
  scale_fill_viridis_d(
    option = "mako",
    name = "Trust Level"
  ) +
  coord_flip() +
  labs(
    title = "Interpersonal Trust Distribution by Age Group",
    x = "Age Group",
    y = "Percentage",
    caption = sprintf("Data: ANES 2020 (n = %d)", total_sample)
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold"),
    legend.position = "right"
  )

D. Interpretation

Generally, to highlight that a clear age gradient emerges in interpersonal trust. For instance discussing how:

  • The oldest age group (70 or older) shows markedly higher trust levels, with 59.2% reporting they trust others “most of the time”

  • Trust levels generally increase with age, from 30.8% trusting “most of the time” among 18-29 year olds to 59.2% among those 70+

  • Strong distrust (“Never” responses) shows the opposite pattern, declining from 8.3% in the youngest group to just 1.3% in the oldest

Task 3: Views on Social Fairness

A. Data Manipulation

ess <- read_fst("All-ESS-Data.fst")
fairness_dist <- ess %>%
  filter(
    cntry %in% c("IT", "DK"),
    !is.na(sofrdst),
    sofrdst <= 5  # Remove NA/DK responses
  ) %>%
  mutate(
    country = case_when(
      cntry == "IT" ~ "Italy",
      cntry == "DK" ~ "Denmark"
    )
  ) %>%
  group_by(country) %>%
  count(sofrdst) %>%
  mutate(
    total = sum(n),
    percent = (n / total) * 100,
    response = factor(
      case_when(
        sofrdst == 1 ~ "Agree strongly",
        sofrdst == 2 ~ "Agree",
        sofrdst == 3 ~ "Neither",
        sofrdst == 4 ~ "Disagree",
        sofrdst == 5 ~ "Disagree strongly"
      ),
      levels = c("Agree strongly", "Agree", "Neither",
                "Disagree", "Disagree strongly")
    )
  ) %>%
  ungroup()

sample_sizes <- fairness_dist %>%
  group_by(country) %>%
  summarize(n = first(total))

B. Table Creation

fairness_dist %>%
  select(country, response, percent) %>%
  pivot_wider(
    id_cols = country,
    names_from = response,
    values_from = percent
  ) %>%
  gt() %>%
  fmt_number(
    columns = c("Agree strongly", "Agree", "Neither", 
                "Disagree", "Disagree strongly"),
    decimals = 1
  ) %>%
  tab_header(
    title = md("**Views on Fair Income Distribution**"),
    subtitle = "Response distribution by country (%)"
  ) %>%
  cols_label(
    country = md("**Country**")
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold")
    ),
    locations = cells_column_labels()
  ) %>%
  tab_source_note(md(
    sprintf("Data: European Social Survey (Italy n = %d, Denmark n = %d)",
            sample_sizes$n[1], sample_sizes$n[2])
  ))
Views on Fair Income Distribution
Response distribution by country (%)
Country Agree strongly Agree Neither Disagree Disagree strongly
Denmark 5.1 17.3 21.0 43.5 13.0
Italy 25.7 50.0 16.6 6.5 1.2
Data: European Social Survey (Italy n = 1548, Denmark n = 2691)

C. Visualizations

ess %>%
  filter(
    cntry %in% c("IT", "DK"),
    !is.na(sofrdst),
    sofrdst <= 5
  ) %>%
  mutate(
    country = case_when(
      cntry == "IT" ~ "Italy",
      cntry == "DK" ~ "Denmark"
    )
  ) %>%
  ggplot(aes(x = sofrdst, y = country, fill = country)) +
  geom_density_ridges(alpha = 0.7, scale = 0.9) +
  scale_fill_brewer(palette = "Set1") +
  scale_x_continuous(
    breaks = 1:5,
    labels = c("Strongly\nagree", "Agree", "Neither",
               "Disagree", "Strongly\ndisagree")
  ) +
  labs(
    title = "Distribution of Views on Income Equality",
    subtitle = "Comparison between Italy and Denmark",
    x = "Response",
    y = NULL
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold"),
    legend.position = "none",
    panel.grid.minor = element_blank()
  )

Education Analysis Plot

fairness_edu <- ess %>%
  filter(
    cntry %in% c("IT", "DK"),
    !is.na(sofrdst),
    sofrdst <= 5,
    !is.na(eisced)
  ) %>%
  mutate(
    country = case_when(
      cntry == "IT" ~ "Italy",
      cntry == "DK" ~ "Denmark"
    ),
    education = case_when(
      eisced <= 2 ~ "Lower secondary or less",
      eisced <= 4 ~ "Upper secondary",
      TRUE ~ "Tertiary"
    ),
    education = factor(education, 
                      levels = c("Lower secondary or less",
                               "Upper secondary", 
                               "Tertiary"))
  )

ggplot(fairness_edu, 
       aes(x = sofrdst, y = education, fill = country)) +
  geom_density_ridges(alpha = 0.7, scale = 1.2) +
  scale_fill_brewer(palette = "Set1") +
  scale_x_continuous(
    breaks = 1:5,
    labels = c("Strongly\nagree", "Agree", "Neither",
               "Disagree", "Strongly\ndisagree")
  ) +
  facet_wrap(~country) +
  labs(
    title = "Views on Income Distribution by Education Level",
    subtitle = "Comparing Italy and Denmark",
    x = "Response",
    y = NULL
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold"),
    legend.position = "none",
    panel.grid.minor = element_blank(),
    strip.text = element_text(face = "bold")
  )

D. Interpretation

Essentially to highlight the clear contrast between the two countries:

  • Italy shows strong support for income redistribution: 75.7% either agree or strongly agree

  • Denmark displays much more skepticism: 56.5% either disagree or strongly disagree

The contrast is particularly stark in “Strongly agree” responses (25.7% in Italy vs 5.1% in Denmark)

Educational Patterns:

  • In Denmark: Higher education is associated with stronger disagreement (60.2% disagree among tertiary educated vs 42.8% among lower secondary)

  • Support for equality declines with education (28.6% agree in lower education vs 21.3% in tertiary)

  • In Italy: Strong support exists across all education levels but declines with education

  • Lower educated show strongest support (81.1% agree) compared to tertiary educated (64.3%)

  • Even the most skeptical group (tertiary educated) shows much stronger support than any Danish group

Individual country load example

denmark_data <- read_fst("denmark_data.fst")
italy_data <- read_fst("italy_data.fst")

combined_data <- bind_rows(
  denmark_data %>% mutate(country = "Denmark"),
  italy_data %>% mutate(country = "Italy")
)

fairness_dist <- combined_data %>%
  filter(
    !is.na(sofrdst),
    sofrdst <= 5  # Remove NA/DK responses
  ) %>%
  group_by(country) %>%
  count(sofrdst) %>%
  mutate(
    total = sum(n),
    percent = (n / total) * 100,
    response = factor(
      case_when(
        sofrdst == 1 ~ "Agree strongly",
        sofrdst == 2 ~ "Agree",
        sofrdst == 3 ~ "Neither",
        sofrdst == 4 ~ "Disagree",
        sofrdst == 5 ~ "Disagree strongly"
      ),
      levels = c("Agree strongly", "Agree", "Neither",
                "Disagree", "Disagree strongly")
    )
  ) %>%
  ungroup()

sample_sizes <- fairness_dist %>%
  group_by(country) %>%
  summarize(n = first(total))

fairness_table <- fairness_dist %>%
  select(country, response, percent) %>%
  pivot_wider(
    id_cols = country,
    names_from = response,
    values_from = percent
  ) %>%
  gt() %>%
  fmt_number(
    columns = c("Agree strongly", "Agree", "Neither", 
                "Disagree", "Disagree strongly"),
    decimals = 1
  ) %>%
  tab_header(
    title = md("**Views on Fair Income Distribution**"),
    subtitle = "Response distribution by country (%)"
  ) %>%
  cols_label(
    country = md("**Country**")
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold")
    ),
    locations = cells_column_labels()
  ) %>%
  tab_source_note(md(
    sprintf("Data: European Social Survey (Italy n = %d, Denmark n = %d)",
            sample_sizes$n[1], sample_sizes$n[2])
  ))

density_plot <- combined_data %>%
  filter(
    !is.na(sofrdst),
    sofrdst <= 5
  ) %>%
  ggplot(aes(x = sofrdst, y = country, fill = country)) +
  geom_density_ridges(alpha = 0.7, scale = 0.9) +
  scale_fill_brewer(palette = "Set1") +
  scale_x_continuous(
    breaks = 1:5,
    labels = c("Strongly\nagree", "Agree", "Neither",
               "Disagree", "Strongly\ndisagree")
  ) +
  labs(
    title = "Distribution of Views on Income Equality",
    subtitle = "Comparison between Italy and Denmark",
    x = "Response",
    y = NULL
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold"),
    legend.position = "none",
    panel.grid.minor = element_blank()
  )

fairness_edu <- combined_data %>%
  filter(
    !is.na(sofrdst),
    sofrdst <= 5,
    !is.na(eisced)
  ) %>%
  mutate(
    education = case_when(
      eisced <= 2 ~ "Lower secondary or less",
      eisced <= 4 ~ "Upper secondary",
      TRUE ~ "Tertiary"
    ),
    education = factor(education, 
                      levels = c("Lower secondary or less",
                               "Upper secondary", 
                               "Tertiary"))
  )

education_plot <- ggplot(fairness_edu, 
       aes(x = sofrdst, y = education, fill = country)) +
  geom_density_ridges(alpha = 0.7, scale = 1.2) +
  scale_fill_brewer(palette = "Set1") +
  scale_x_continuous(
    breaks = 1:5,
    labels = c("Strongly\nagree", "Agree", "Neither",
               "Disagree", "Strongly\ndisagree")
  ) +
  facet_wrap(~country) +
  labs(
    title = "Views on Income Distribution by Education Level",
    subtitle = "Comparing Italy and Denmark",
    x = "Response",
    y = NULL
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold"),
    legend.position = "none",
    panel.grid.minor = element_blank(),
    strip.text = element_text(face = "bold")
  )

fairness_table
Views on Fair Income Distribution
Response distribution by country (%)
Country Agree strongly Agree Neither Disagree Disagree strongly
Denmark 5.1 17.3 21.0 43.5 13.0
Italy 25.7 50.0 16.6 6.5 1.2
Data: European Social Survey (Italy n = 1548, Denmark n = 2691)
print(density_plot)

print(education_plot)

End