knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)

# Load required packages
packages <- c(
  "tidyverse",
  "gt",
  "gapminder",
  "srvyr",
  "srvyrexploR",
  "fst",
  "ggridges"
)

# Install if needed and load
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Attaching package: 'srvyr'
## 
## 
## The following object is masked from 'package:stats':
## 
##     filter

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "gt"        "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "gapminder" "gt"        "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "srvyr"     "gapminder" "gt"        "lubridate" "forcats"   "stringr"  
##  [7] "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [13] "tidyverse" "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [19] "methods"   "base"     
## 
## [[5]]
##  [1] "srvyrexploR" "srvyr"       "gapminder"   "gt"          "lubridate"  
##  [6] "forcats"     "stringr"     "dplyr"       "purrr"       "readr"      
## [11] "tidyr"       "tibble"      "ggplot2"     "tidyverse"   "stats"      
## [16] "graphics"    "grDevices"   "utils"       "datasets"    "methods"    
## [21] "base"       
## 
## [[6]]
##  [1] "fst"         "srvyrexploR" "srvyr"       "gapminder"   "gt"         
##  [6] "lubridate"   "forcats"     "stringr"     "dplyr"       "purrr"      
## [11] "readr"       "tidyr"       "tibble"      "ggplot2"     "tidyverse"  
## [16] "stats"       "graphics"    "grDevices"   "utils"       "datasets"   
## [21] "methods"     "base"       
## 
## [[7]]
##  [1] "ggridges"    "fst"         "srvyrexploR" "srvyr"       "gapminder"  
##  [6] "gt"          "lubridate"   "forcats"     "stringr"     "dplyr"      
## [11] "purrr"       "readr"       "tidyr"       "tibble"      "ggplot2"    
## [16] "tidyverse"   "stats"       "graphics"    "grDevices"   "utils"      
## [21] "datasets"    "methods"     "base"

Task 1: Global Life Expectancy Changes

A. Data Manipulation

life_exp_summary <- gapminder %>%
  filter(year %in% c(1987, 2007)) %>%  
  group_by(continent, country) %>%
  summarize(
    start_life = first(lifeExp),
    end_life = last(lifeExp),
    change = end_life - start_life,
    avg_life = mean(lifeExp),  
    .groups = "drop"
  ) %>%
  arrange(avg_life)
life_exp_summary

## # A tibble: 142 × 6
##    continent country       start_life end_life  change avg_life
##    <fct>     <fct>              <dbl>    <dbl>   <dbl>    <dbl>
##  1 Africa    Sierra Leone        40.0     42.6  2.56       41.3
##  2 Africa    Angola              39.9     42.7  2.83       41.3
##  3 Asia      Afghanistan         40.8     43.8  3.01       42.3
##  4 Africa    Mozambique          42.9     42.1 -0.779      42.5
##  5 Africa    Guinea-Bissau       41.2     46.4  5.14       43.8
##  6 Africa    Rwanda              44.0     46.2  2.22       45.1
##  7 Africa    Liberia             46.0     45.7 -0.349      45.9
##  8 Africa    Somalia             44.5     48.2  3.66       46.3
##  9 Africa    Zambia              50.8     42.4 -8.44       46.6
## 10 Africa    Nigeria             46.9     46.9 -0.0270     46.9
## # ℹ 132 more rows

key_cases <- life_exp_summary %>%
  filter(country %in% c("Niger", "Bangladesh", 
                       "El Salvador", "Iraq", "Zimbabwe"))
print(key_cases)

## # A tibble: 5 × 6
##   continent country     start_life end_life change avg_life
##   <fct>     <fct>            <dbl>    <dbl>  <dbl>    <dbl>
## 1 Africa    Niger             44.6     56.9  12.3      50.7
## 2 Africa    Zimbabwe          62.4     43.5 -18.9      52.9
## 3 Asia      Bangladesh        52.8     64.1  11.2      58.4
## 4 Asia      Iraq              65.0     59.5  -5.50     62.3
## 5 Americas  El Salvador       63.2     71.9   8.72     67.5

B. Table Creation

life_exp_table <- gapminder %>%
  filter(year %in% c(1987, 2007)) %>%
  group_by(continent, year) %>%
  summarize(
    mean_life = mean(lifeExp),
    n = n(),
    .groups = "drop"
  ) %>%
  pivot_wider(
    id_cols = continent,
    names_from = year,
    values_from = mean_life,
    names_prefix = "mean_"
  ) %>%
  mutate(change = mean_2007 - mean_1987) %>%
  arrange(desc(change))

life_exp_table %>%
  gt() %>%
  fmt_number(
    columns = c(mean_1987, mean_2007, change),
    decimals = 1
  ) %>%
  tab_header(
    title = md("**Life Expectancy Changes by Continent**"),
    subtitle = "Average life expectancy in years"
  ) %>%
  cols_label(
    continent = md("**Continent**"),
    mean_1987 = md("**1987**"),
    mean_2007 = md("**2007**"),
    change = md("**Change**")
  ) %>%
  tab_source_note(
    source_note = "Data: Gapminder"
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold")
    ),
    locations = cells_column_labels()
  ) %>%
  tab_options(
    table.border.top.width = 2,
    table.border.bottom.width = 2
  )

Continent	1987	2007	Change
Life Expectancy Changes by Continent
Average life expectancy in years
Asia	64.9	70.7	5.9
Americas	68.1	73.6	5.5
Oceania	75.3	80.7	5.4
Europe	73.6	77.6	4.0
Africa	53.3	54.8	1.5
Data: Gapminder

C. Data Visualization

gapminder %>%
  filter(
    year >= 1987, 
    year <= 2007,
    country %in% c("Niger", "Bangladesh", "El Salvador", 
                   "Iraq", "Zimbabwe")
  ) %>%
  ggplot(aes(x = year, y = lifeExp, color = country)) +
  geom_line(size = 1.5) +
  scale_color_brewer(palette = "Set1") +
  labs(
    title = "Life Expectancy Trajectories (1987-2007)",
    subtitle = "in Selected Countries",
    x = "Year",
    y = "Life Expectancy (years)",
    color = "Country"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 12),
    legend.position = "bottom",
    panel.grid.minor = element_blank(),
    # Add professional polish
    axis.title = element_text(size = 10),
    axis.text = element_text(size = 9, color = "gray30"),
    panel.grid.major = element_line(color = "gray90")
  )

D. Interpretation

Generally to discuss and highlight clear regional disparity with Africa:

Asia and the Americas saw the largest gains, with increases of 5.9 and 5.5 years respectively
Despite high initial levels, Oceania continued to improve with a 5.4 year increase
Europe, starting from a high base of 73.6 years, still gained 4.0 years
Most concerningly, Africa showed minimal progress with only a 1.5 year increase, starting from and remaining at dramatically lower levels (53.3 to 54.8 years)

This pattern highlights a concerning trend: regions starting with higher life expectancy generally saw larger absolute gains, while Africa, beginning with the lowest life expectancy, experienced the smallest improvement.

For the visualization for selected countries, to highlight the different lige expectancy trajectories and notably the Niger/Zimbabwe contrast (so to showcase within-region variation despite the overall regional trend highlighted above):

For instance, discussing how: Niger shows a remarkable positive transformation, with life expectancy increasing by 12.3 years (from 44.6 to 56.9 years). This represents one of the largest improvements globally during this period, though starting from a very low base. In contrast, Zimbabwe experienced a devastating decline of 18.9 years (from 62.4 to 43.5 years), one of the steepest drops in life expectancy recorded in recent history, likely reflecting the combined impact of the HIV/AIDS epidemic and economic crisis. Bangladesh and El Salvador demonstrate steady progress from different starting points. Bangladesh achieved substantial gains of 11.2 years (from 52.8 to 64.1 years), while El Salvador saw more moderate but consistent improvement of 8.7 years (from 63.2 to 71.9 years). Iraq, however, illustrates the impact of political instability and conflict, with life expectancy declining by 5.5 years (from 65.0 to 59.5 years).

Task 2: Interpersonal Trust Across Age Groups

A. Data Manipulation

trust_by_age <- anes_2020 %>%
  filter(!is.na(TrustPeople), !is.na(AgeGroup)) %>%
  group_by(AgeGroup) %>%
  count(TrustPeople) %>%
  mutate(
    total = sum(n),
    percent = (n / total) * 100
  ) %>%
  ungroup()

trust_by_age

## # A tibble: 30 × 5
##    AgeGroup TrustPeople             n total percent
##    <fct>    <fct>               <int> <int>   <dbl>
##  1 18-29    Always                  7   871   0.804
##  2 18-29    Most of the time      268   871  30.8  
##  3 18-29    About half the time   278   871  31.9  
##  4 18-29    Some of the time      246   871  28.2  
##  5 18-29    Never                  72   871   8.27 
##  6 30-39    Always                 10  1239   0.807
##  7 30-39    Most of the time      502  1239  40.5  
##  8 30-39    About half the time   378  1239  30.5  
##  9 30-39    Some of the time      281  1239  22.7  
## 10 30-39    Never                  68  1239   5.49 
## # ℹ 20 more rows

total_sample <- sum(trust_by_age$n)

total_sample

## [1] 7153

B. Table Creation

trust_by_age %>%
  pivot_wider(
    id_cols = AgeGroup,
    names_from = TrustPeople,
    values_from = percent
  ) %>%
  gt() %>%
  fmt_number(
    columns = c("Always", "Most of the time", 
                "About half the time", "Some of the time", 
                "Never"),
    decimals = 1
  ) %>%
  tab_header(
    title = md("**Interpersonal Trust by Age Group**"),
    subtitle = "Distribution of responses (percentages)"
  ) %>%
  cols_label(
    AgeGroup = md("**Age Group**")
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold")
    ),
    locations = cells_column_labels()
  ) %>%
  tab_source_note(md(
    sprintf("Data: ANES 2020 (n = %d)", total_sample)
  ))

Age Group	Always	Most of the time	About half the time	Some of the time	Never
Interpersonal Trust by Age Group
Distribution of responses (percentages)
18-29	0.8	30.8	31.9	28.2	8.3
30-39	0.8	40.5	30.5	22.7	5.5
40-49	0.7	44.1	29.1	22.9	3.2
50-59	0.2	48.9	27.1	20.8	3.1
60-69	0.7	52.4	25.2	19.8	1.9
70 or older	0.6	59.2	21.6	17.3	1.3
Data: ANES 2020 (n = 7153)

C. Data Visualization

ggplot(trust_by_age,
       aes(x = fct_rev(AgeGroup), y = percent, 
           fill = fct_rev(TrustPeople))) +
  geom_col() +
  scale_fill_viridis_d(
    option = "mako",
    name = "Trust Level"
  ) +
  coord_flip() +
  labs(
    title = "Interpersonal Trust Distribution by Age Group",
    x = "Age Group",
    y = "Percentage",
    caption = sprintf("Data: ANES 2020 (n = %d)", total_sample)
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold"),
    legend.position = "right"
  )

D. Interpretation

Generally, to highlight that a clear age gradient emerges in interpersonal trust. For instance discussing how:

The oldest age group (70 or older) shows markedly higher trust levels, with 59.2% reporting they trust others “most of the time”
Trust levels generally increase with age, from 30.8% trusting “most of the time” among 18-29 year olds to 59.2% among those 70+
Strong distrust (“Never” responses) shows the opposite pattern, declining from 8.3% in the youngest group to just 1.3% in the oldest

Task 3: Views on Social Fairness

A. Data Manipulation

ess <- read_fst("All-ESS-Data.fst")

fairness_dist <- ess %>%
  filter(
    cntry %in% c("IT", "DK"),
    !is.na(sofrdst),
    sofrdst <= 5  # Remove NA/DK responses
  ) %>%
  mutate(
    country = case_when(
      cntry == "IT" ~ "Italy",
      cntry == "DK" ~ "Denmark"
    )
  ) %>%
  group_by(country) %>%
  count(sofrdst) %>%
  mutate(
    total = sum(n),
    percent = (n / total) * 100,
    response = factor(
      case_when(
        sofrdst == 1 ~ "Agree strongly",
        sofrdst == 2 ~ "Agree",
        sofrdst == 3 ~ "Neither",
        sofrdst == 4 ~ "Disagree",
        sofrdst == 5 ~ "Disagree strongly"
      ),
      levels = c("Agree strongly", "Agree", "Neither",
                "Disagree", "Disagree strongly")
    )
  ) %>%
  ungroup()

sample_sizes <- fairness_dist %>%
  group_by(country) %>%
  summarize(n = first(total))

B. Table Creation

fairness_dist %>%
  select(country, response, percent) %>%
  pivot_wider(
    id_cols = country,
    names_from = response,
    values_from = percent
  ) %>%
  gt() %>%
  fmt_number(
    columns = c("Agree strongly", "Agree", "Neither", 
                "Disagree", "Disagree strongly"),
    decimals = 1
  ) %>%
  tab_header(
    title = md("**Views on Fair Income Distribution**"),
    subtitle = "Response distribution by country (%)"
  ) %>%
  cols_label(
    country = md("**Country**")
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold")
    ),
    locations = cells_column_labels()
  ) %>%
  tab_source_note(md(
    sprintf("Data: European Social Survey (Italy n = %d, Denmark n = %d)",
            sample_sizes$n[1], sample_sizes$n[2])
  ))

Country	Agree strongly	Agree	Neither	Disagree	Disagree strongly
Views on Fair Income Distribution
Response distribution by country (%)
Denmark	5.1	17.3	21.0	43.5	13.0
Italy	25.7	50.0	16.6	6.5	1.2
Data: European Social Survey (Italy n = 1548, Denmark n = 2691)

C. Visualizations

ess %>%
  filter(
    cntry %in% c("IT", "DK"),
    !is.na(sofrdst),
    sofrdst <= 5
  ) %>%
  mutate(
    country = case_when(
      cntry == "IT" ~ "Italy",
      cntry == "DK" ~ "Denmark"
    )
  ) %>%
  ggplot(aes(x = sofrdst, y = country, fill = country)) +
  geom_density_ridges(alpha = 0.7, scale = 0.9) +
  scale_fill_brewer(palette = "Set1") +
  scale_x_continuous(
    breaks = 1:5,
    labels = c("Strongly\nagree", "Agree", "Neither",
               "Disagree", "Strongly\ndisagree")
  ) +
  labs(
    title = "Distribution of Views on Income Equality",
    subtitle = "Comparison between Italy and Denmark",
    x = "Response",
    y = NULL
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold"),
    legend.position = "none",
    panel.grid.minor = element_blank()
  )

Education Analysis Plot

fairness_edu <- ess %>%
  filter(
    cntry %in% c("IT", "DK"),
    !is.na(sofrdst),
    sofrdst <= 5,
    !is.na(eisced)
  ) %>%
  mutate(
    country = case_when(
      cntry == "IT" ~ "Italy",
      cntry == "DK" ~ "Denmark"
    ),
    education = case_when(
      eisced <= 2 ~ "Lower secondary or less",
      eisced <= 4 ~ "Upper secondary",
      TRUE ~ "Tertiary"
    ),
    education = factor(education, 
                      levels = c("Lower secondary or less",
                               "Upper secondary", 
                               "Tertiary"))
  )

ggplot(fairness_edu, 
       aes(x = sofrdst, y = education, fill = country)) +
  geom_density_ridges(alpha = 0.7, scale = 1.2) +
  scale_fill_brewer(palette = "Set1") +
  scale_x_continuous(
    breaks = 1:5,
    labels = c("Strongly\nagree", "Agree", "Neither",
               "Disagree", "Strongly\ndisagree")
  ) +
  facet_wrap(~country) +
  labs(
    title = "Views on Income Distribution by Education Level",
    subtitle = "Comparing Italy and Denmark",
    x = "Response",
    y = NULL
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold"),
    legend.position = "none",
    panel.grid.minor = element_blank(),
    strip.text = element_text(face = "bold")
  )

D. Interpretation

Essentially to highlight the clear contrast between the two countries:

Italy shows strong support for income redistribution: 75.7% either agree or strongly agree
Denmark displays much more skepticism: 56.5% either disagree or strongly disagree

The contrast is particularly stark in “Strongly agree” responses (25.7% in Italy vs 5.1% in Denmark)

Educational Patterns:

In Denmark: Higher education is associated with stronger disagreement (60.2% disagree among tertiary educated vs 42.8% among lower secondary)
Support for equality declines with education (28.6% agree in lower education vs 21.3% in tertiary)
In Italy: Strong support exists across all education levels but declines with education
Lower educated show strongest support (81.1% agree) compared to tertiary educated (64.3%)
Even the most skeptical group (tertiary educated) shows much stronger support than any Danish group

Individual country load example

denmark_data <- read_fst("denmark_data.fst")
italy_data <- read_fst("italy_data.fst")

combined_data <- bind_rows(
  denmark_data %>% mutate(country = "Denmark"),
  italy_data %>% mutate(country = "Italy")
)

fairness_dist <- combined_data %>%
  filter(
    !is.na(sofrdst),
    sofrdst <= 5  # Remove NA/DK responses
  ) %>%
  group_by(country) %>%
  count(sofrdst) %>%
  mutate(
    total = sum(n),
    percent = (n / total) * 100,
    response = factor(
      case_when(
        sofrdst == 1 ~ "Agree strongly",
        sofrdst == 2 ~ "Agree",
        sofrdst == 3 ~ "Neither",
        sofrdst == 4 ~ "Disagree",
        sofrdst == 5 ~ "Disagree strongly"
      ),
      levels = c("Agree strongly", "Agree", "Neither",
                "Disagree", "Disagree strongly")
    )
  ) %>%
  ungroup()

sample_sizes <- fairness_dist %>%
  group_by(country) %>%
  summarize(n = first(total))

fairness_table <- fairness_dist %>%
  select(country, response, percent) %>%
  pivot_wider(
    id_cols = country,
    names_from = response,
    values_from = percent
  ) %>%
  gt() %>%
  fmt_number(
    columns = c("Agree strongly", "Agree", "Neither", 
                "Disagree", "Disagree strongly"),
    decimals = 1
  ) %>%
  tab_header(
    title = md("**Views on Fair Income Distribution**"),
    subtitle = "Response distribution by country (%)"
  ) %>%
  cols_label(
    country = md("**Country**")
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold")
    ),
    locations = cells_column_labels()
  ) %>%
  tab_source_note(md(
    sprintf("Data: European Social Survey (Italy n = %d, Denmark n = %d)",
            sample_sizes$n[1], sample_sizes$n[2])
  ))

density_plot <- combined_data %>%
  filter(
    !is.na(sofrdst),
    sofrdst <= 5
  ) %>%
  ggplot(aes(x = sofrdst, y = country, fill = country)) +
  geom_density_ridges(alpha = 0.7, scale = 0.9) +
  scale_fill_brewer(palette = "Set1") +
  scale_x_continuous(
    breaks = 1:5,
    labels = c("Strongly\nagree", "Agree", "Neither",
               "Disagree", "Strongly\ndisagree")
  ) +
  labs(
    title = "Distribution of Views on Income Equality",
    subtitle = "Comparison between Italy and Denmark",
    x = "Response",
    y = NULL
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold"),
    legend.position = "none",
    panel.grid.minor = element_blank()
  )

fairness_edu <- combined_data %>%
  filter(
    !is.na(sofrdst),
    sofrdst <= 5,
    !is.na(eisced)
  ) %>%
  mutate(
    education = case_when(
      eisced <= 2 ~ "Lower secondary or less",
      eisced <= 4 ~ "Upper secondary",
      TRUE ~ "Tertiary"
    ),
    education = factor(education, 
                      levels = c("Lower secondary or less",
                               "Upper secondary", 
                               "Tertiary"))
  )

education_plot <- ggplot(fairness_edu, 
       aes(x = sofrdst, y = education, fill = country)) +
  geom_density_ridges(alpha = 0.7, scale = 1.2) +
  scale_fill_brewer(palette = "Set1") +
  scale_x_continuous(
    breaks = 1:5,
    labels = c("Strongly\nagree", "Agree", "Neither",
               "Disagree", "Strongly\ndisagree")
  ) +
  facet_wrap(~country) +
  labs(
    title = "Views on Income Distribution by Education Level",
    subtitle = "Comparing Italy and Denmark",
    x = "Response",
    y = NULL
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold"),
    legend.position = "none",
    panel.grid.minor = element_blank(),
    strip.text = element_text(face = "bold")
  )

fairness_table

Country	Agree strongly	Agree	Neither	Disagree	Disagree strongly
Views on Fair Income Distribution
Response distribution by country (%)
Denmark	5.1	17.3	21.0	43.5	13.0
Italy	25.7	50.0	16.6	6.5	1.2
Data: European Social Survey (Italy n = 1548, Denmark n = 2691)

print(density_plot)

print(education_plot)

SOC-3320: Skill-Building Exercise 1

Example Solution

Winter 2025

Task 1: Global Life Expectancy Changes

A. Data Manipulation

B. Table Creation

C. Data Visualization

D. Interpretation

Task 2: Interpersonal Trust Across Age Groups

A. Data Manipulation

B. Table Creation

C. Data Visualization

D. Interpretation

End