# Install and load necessary packages
packages <- c("tidyverse", "gt", "gapminder", "srvyr", "srvyrexploR", "fst", "ggridges")

# Check for missing packages and install them
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Attaching package: 'srvyr'
## 
## 
## The following object is masked from 'package:stats':
## 
##     filter
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "gt"        "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "gapminder" "gt"        "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "srvyr"     "gapminder" "gt"        "lubridate" "forcats"   "stringr"  
##  [7] "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [13] "tidyverse" "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [19] "methods"   "base"     
## 
## [[5]]
##  [1] "srvyrexploR" "srvyr"       "gapminder"   "gt"          "lubridate"  
##  [6] "forcats"     "stringr"     "dplyr"       "purrr"       "readr"      
## [11] "tidyr"       "tibble"      "ggplot2"     "tidyverse"   "stats"      
## [16] "graphics"    "grDevices"   "utils"       "datasets"    "methods"    
## [21] "base"       
## 
## [[6]]
##  [1] "fst"         "srvyrexploR" "srvyr"       "gapminder"   "gt"         
##  [6] "lubridate"   "forcats"     "stringr"     "dplyr"       "purrr"      
## [11] "readr"       "tidyr"       "tibble"      "ggplot2"     "tidyverse"  
## [16] "stats"       "graphics"    "grDevices"   "utils"       "datasets"   
## [21] "methods"     "base"       
## 
## [[7]]
##  [1] "ggridges"    "fst"         "srvyrexploR" "srvyr"       "gapminder"  
##  [6] "gt"          "lubridate"   "forcats"     "stringr"     "dplyr"      
## [11] "purrr"       "readr"       "tidyr"       "tibble"      "ggplot2"    
## [16] "tidyverse"   "stats"       "graphics"    "grDevices"   "utils"      
## [21] "datasets"    "methods"     "base"

Explanation

This script ensures all required libraries are installed and loaded into your R session.

A. Data Manipulation (gapminder data)

• Filter for years 1987 and 2007 • Calculate mean life expectancy by continent for each year • Calculate the change between years • In a separate step, filter to five focal countries (Niger, Bangladesh, El Salvador, Iraq, Zimbabwe)

gapminder_filtered <- gapminder %>% 
  filter(year %in% c(1987, 2007)) %>% 
  group_by(continent, country, year) %>% 
  summarise(mean_lifeExp = mean(lifeExp, na.rm = TRUE), .groups = "drop") %>% 
  pivot_wider(names_from = year, values_from = mean_lifeExp) %>% 
  mutate(Change = `2007` - `1987`
  ) %>% 
  arrange(desc(Change))
gapminder_filtered
## # A tibble: 142 × 5
##    continent country    `1987` `2007` Change
##    <fct>     <fct>       <dbl>  <dbl>  <dbl>
##  1 Africa    Niger        44.6   56.9   12.3
##  2 Africa    Eritrea      46.5   58.0   11.6
##  3 Africa    Egypt        59.8   71.3   11.5
##  4 Asia      Vietnam      62.8   74.2   11.4
##  5 Asia      Nepal        52.5   63.8   11.2
##  6 Asia      Bangladesh   52.8   64.1   11.2
##  7 Americas  Nicaragua    62.0   72.9   10.9
##  8 Asia      Indonesia    60.1   70.6   10.5
##  9 Africa    Guinea       45.6   56.0   10.5
## 10 Africa    Comoros      54.9   65.2   10.2
## # ℹ 132 more rows

Explanation

Filters gapminder for 1987 and 2007, calculates the mean life expectancy per continent, and computes the change over time.

# Filter five focal countries
gapminder_countries <- gapminder %>% 
  filter(year %in% c(1987, 2007), country %in% c("Niger", "Bangladesh", "El Salvador", "Iraq", "Zimbabwe"))

B. Table Creation:

• Title: Life Expectancy Changes by Continent • Subtitle: Average life expectancy in years • Columns: o 1987 values o 2007 values o Change (2007-1987) • Format: o All values to one decimal place o Bold headers (Continent, 1987, 2007, Change) o Order continents by magnitude of change (largest to smallest) • Source note: Data: Gapminder

library(gt)

gt <- gapminder_filtered %>% 
  gt() %>% 
  tab_header(
    title = md("**<span style='font-size:14pt'>Life Expectancy Changes by Continent</span>**"),
    subtitle = md("<span style='font-size:12pt'>Average life expectancy in years</span>")
  ) %>%
  fmt_number(columns = c(`1987`, `2007`, Change), decimals = 1) %>%
  cols_label(
    continent = md("**Continent**"),
    `1987` = md("**1987**"),
    `2007` = md("**2007**"),
    Change = md("**Change**")
  ) %>%
  tab_source_note("Data: Gapminder")

gt
Life Expectancy Changes by Continent
Average life expectancy in years
Continent country 1987 2007 Change
Africa Niger 44.6 56.9 12.3
Africa Eritrea 46.5 58.0 11.6
Africa Egypt 59.8 71.3 11.5
Asia Vietnam 62.8 74.2 11.4
Asia Nepal 52.5 63.8 11.2
Asia Bangladesh 52.8 64.1 11.2
Americas Nicaragua 62.0 72.9 10.9
Asia Indonesia 60.1 70.7 10.5
Africa Guinea 45.6 56.0 10.5
Africa Comoros 54.9 65.2 10.2
Africa Gambia 49.3 59.4 10.2
Africa Madagascar 49.4 59.4 10.1
Asia Yemen, Rep. 52.9 62.7 9.8
Americas Guatemala 60.8 70.3 9.5
Asia Korea, Rep. 69.8 78.6 8.8
Americas El Salvador 63.2 71.9 8.7
Europe Turkey 63.1 71.8 8.7
Africa Morocco 62.7 71.2 8.5
Americas Bolivia 57.3 65.6 8.3
Africa Mali 46.4 54.5 8.1
Africa Mauritania 56.1 64.2 8.0
Asia Iran 63.0 71.0 7.9
Asia Oman 67.7 75.6 7.9
Americas Ecuador 67.2 75.0 7.8
Africa Libya 66.2 74.0 7.7
Asia Philippines 64.2 71.7 7.5
Africa Senegal 55.8 63.1 7.3
Americas Peru 64.1 71.4 7.3
Americas Haiti 53.6 60.9 7.3
Asia Pakistan 58.2 65.5 7.2
Americas Brazil 65.2 72.4 7.2
Asia Syria 67.0 74.1 7.2
Africa Tunisia 66.9 73.9 7.0
Africa Sudan 51.7 58.6 6.8
Americas Mexico 69.5 76.2 6.7
Asia Jordan 65.9 72.5 6.7
Asia Mongolia 60.2 66.8 6.6
Africa Algeria 65.8 72.3 6.5
Asia Saudi Arabia 66.3 72.8 6.5
Asia Singapore 73.6 80.0 6.4
Asia West Bank and Gaza 67.0 73.4 6.4
Africa Ethiopia 46.7 52.9 6.3
Americas Dominican Republic 66.0 72.2 6.2
Asia India 58.6 64.7 6.1
Americas Chile 72.5 78.6 6.1
Asia Hong Kong, China 76.2 82.2 6.0
Africa Equatorial Guinea 45.7 51.6 5.9
Oceania New Zealand 74.3 80.2 5.9
Asia Cambodia 53.9 59.7 5.8
Americas Honduras 64.5 70.2 5.7
Asia China 67.3 73.0 5.7
Europe Slovenia 72.2 77.9 5.7
Asia Israel 75.6 80.7 5.1
Africa Guinea-Bissau 41.2 46.4 5.1
Americas Colombia 67.8 72.9 5.1
Asia Taiwan 73.4 78.4 5.0
Oceania Australia 76.3 81.2 4.9
Europe Czech Republic 71.6 76.5 4.9
Europe Austria 74.9 79.8 4.9
Asia Bahrain 70.8 75.6 4.9
Africa Djibouti 50.0 54.8 4.8
Asia Malaysia 69.5 74.2 4.7
Europe Poland 71.0 75.6 4.6
Europe Germany 74.8 79.4 4.6
Americas Argentina 70.8 75.3 4.5
Asia Thailand 66.1 70.6 4.5
Africa Reunion 71.9 76.4 4.5
Europe Iceland 77.2 81.8 4.5
Europe Ireland 74.4 78.9 4.5
Europe Finland 74.8 79.3 4.5
Americas Uruguay 71.9 76.4 4.5
Europe Albania 72.0 76.4 4.4
Europe United Kingdom 75.0 79.4 4.4
Africa Benin 52.3 56.7 4.4
Americas Paraguay 67.4 71.8 4.4
Europe France 76.3 80.7 4.3
Europe Norway 75.9 80.2 4.3
Africa Ghana 55.7 60.0 4.3
Europe Switzerland 77.4 81.7 4.3
Europe Croatia 71.5 75.7 4.2
Europe Italy 76.4 80.5 4.1
Americas Puerto Rico 74.6 78.7 4.1
Americas Cuba 74.2 78.3 4.1
Europe Belgium 75.3 79.4 4.1
Asia Lebanon 67.9 72.0 4.1
Africa Mauritius 68.7 72.8 4.1
Europe Spain 76.9 80.9 4.0
Europe Portugal 74.1 78.1 4.0
Americas Costa Rica 74.8 78.8 4.0
Americas Panama 71.5 75.5 4.0
Asia Japan 78.7 82.6 3.9
Africa Sao Tome and Principe 61.7 65.5 3.8
Americas Canada 76.9 80.7 3.8
Europe Hungary 69.6 73.3 3.8
Asia Myanmar 58.3 62.1 3.7
Europe Bosnia and Herzegovina 71.1 74.9 3.7
Europe Sweden 77.2 80.9 3.7
Africa Somalia 44.5 48.2 3.7
Europe Slovak Republic 71.1 74.7 3.6
Americas Venezuela 70.2 73.7 3.6
Europe Denmark 74.8 78.3 3.5
Asia Kuwait 74.2 77.6 3.4
Asia Sri Lanka 69.0 72.4 3.4
Americas United States 75.0 78.2 3.2
Asia Afghanistan 40.8 43.8 3.0
Europe Romania 69.5 72.5 2.9
Europe Netherlands 76.8 79.8 2.9
Africa Angola 39.9 42.7 2.8
Europe Greece 76.7 79.5 2.8
Europe Serbia 71.2 74.0 2.8
Africa Burkina Faso 49.6 52.3 2.7
Africa Sierra Leone 40.0 42.6 2.6
Africa Rwanda 44.0 46.2 2.2
Europe Bulgaria 71.3 73.0 1.7
Africa Togo 56.9 58.4 1.5
Africa Burundi 48.2 49.6 1.4
Africa Tanzania 51.5 52.5 1.0
Africa Malawi 47.5 48.3 0.8
Americas Jamaica 71.8 72.6 0.8
Americas Trinidad and Tobago 69.6 69.8 0.2
Africa Uganda 51.5 51.5 0.0
Africa Nigeria 46.9 46.9 0.0
Europe Montenegro 74.9 74.5 −0.3
Africa Liberia 46.0 45.7 −0.3
Africa Chad 51.1 50.7 −0.4
Africa Mozambique 42.9 42.1 −0.8
Africa Congo, Dem. Rep. 47.4 46.5 −0.9
Africa Congo, Rep. 57.5 55.3 −2.1
Asia Korea, Dem. Rep. 70.6 67.3 −3.4
Africa Gabon 60.2 56.7 −3.5
Africa Cameroon 55.0 50.4 −4.6
Africa Kenya 59.3 54.1 −5.2
Asia Iraq 65.0 59.5 −5.5
Africa Central African Republic 50.5 44.7 −5.7
Africa Cote d'Ivoire 54.7 48.3 −6.3
Africa Namibia 60.8 52.9 −7.9
Africa Zambia 50.8 42.4 −8.4
Africa South Africa 60.8 49.3 −11.5
Africa Botswana 63.6 50.7 −12.9
Africa Lesotho 57.2 42.6 −14.6
Africa Swaziland 57.7 39.6 −18.1
Africa Zimbabwe 62.4 43.5 −18.9
Data: Gapminder

Explanation

Creates a formatted table displaying life expectancy changes by continent.

C. Data Visualization:

• Title: “Life Expectancy Trajectories (1987-2007)” • Subtitle: “in Selected Countries” • Format: o Lines with size = 1.5 o Use scale_color_brewer(palette = “Set1”) o theme_minimal() with no minor grid lines o Legend at bottom o Title in bold (size = 14) o Subtitle size = 12 o Clear axis labels (“Year”, “Life Expectancy (years)”)

# Plot life expectancy trajectories
library(ggplot2)
library(dplyr)

ggplot(gapminder_countries, aes(x = year, y = lifeExp, color = country)) +
  geom_line(linewidth = 1.5) +  # Set line size
  scale_color_brewer(palette = "Set1") +  # Use Set1 color palette
  theme_minimal(base_size = 12) +  # Base theme size
  theme(
    panel.grid.minor = element_blank(),  # Remove minor grid lines
    legend.position = "bottom",  # Move legend to bottom
    plot.title = element_text(face = "bold", size = 14),  # Bold title with size 14
    plot.subtitle = element_text(size = 12)  # Subtitle with size 12
  ) +
  labs(
    title = "Life Expectancy Trajectories (1987-2007)",
    subtitle = "in Selected Countries",
    x = "Year",
    y = "Life Expectancy (years)",
    color = "Country"
  )

Explanation

Generates a line plot comparing life expectancy trends for selected countries.

D. Interpretation:

  1. Continental trends: o Compare changes across regions o Discuss initial and final values for life expectancy (i.e., 1987 and 2007)
  2. Five-country analysis: o Compare magnitude of changes o Contrast different trajectorie

Continental trends: Comparison By Regions: In 1987 and 2007, life expectancy in regions changed in various ways.
Some regions experienced significant improvements and a few experienced minor improvements.

Initial and Final Values: Africa experienced its lowest life expectancy in 1987 and remained constant in 2007, with a minor improvement due to medical care. Due to increased living standards and medical care, Asia and Latin America experienced increased life expectancy. North America and Europe initially had high life expectancy and continued to rise, but at a lesser pace compared to developing nations. Oceania experienced high life expectancy, but its rise over 20 years was slow.

Five-country analysis:Magnitude of Changes: For the largest increase, due to significant improvements in economy, sanitation, and healthcare; Bangladesh probably experiences the most improvement.

Moderate Increases: El Salvador and Iraq experienced continuous improvements despite having experienced conflicts in the past. Minimal Change or Decline: Zimbabwe probably experienced least improvement or even worsened due to political and financial issues. Niger: Experienced some improvement, but improvement happened at a slow pace due to persistent issues in infrastructure and healthcare.

Difference in Trends: The nations with strong improvement in economy and healthcare (such as Bangladesh) experienced best improvement. The nations with experiences in war and politics (like Zimbabwe and Iraq) experienced little or slow improvement. The countries in between (Niger and El Salvador) experienced continuous but not significant improvements.

Task 2: Interpersonal Trust Patterns (3 points)

A. Data Manipulation (anes_2020 data) • Remove missing values for the variables TrustPeople and AgeGroup • Calculate percentage of trust categories by age group • Calculate total sample size

# Load and clean ANES 2020 dataset
anes_data <- anes_2020 %>% 
  filter(!is.na(TrustPeople), !is.na(AgeGroup)) %>%  # Remove missing values
  group_by(AgeGroup, TrustPeople) %>% 
  summarise(count = n(), .groups = "drop") %>%  # Fix: Drop grouping after summarization
  mutate(percentage = count / sum(count) * 100)

Explanation

This script loads and cleans the ANES 2020 dataset, focusing on interpersonal trust levels by age group.

B. Table Creation:

• Title: “Interpersonal Trust by Age Group” • Subtitle: “Distribution of responses (percentages)” • Format: o All percentages to one decimal place o Bold “Age Group” header o All trust categories as column headers • Source note: “Data: ANES 2020 (sample size value)”

gt <- anes_data %>%
  pivot_wider(names_from = TrustPeople, values_from = percentage) %>%
  gt() %>%
  tab_header(
    title = md("**Interpersonal Trust by Age Group**"),
    subtitle = "Distribution of responses (percentages)"
  ) %>%
  fmt_number(columns = everything(), decimals = 1) %>%
  cols_label(AgeGroup = md("**Age Group**")) %>%
  tab_source_note("Data: ANES 2020 (Sample size value)")

gt
Interpersonal Trust by Age Group
Distribution of responses (percentages)
Age Group count Always Most of the time About half the time Some of the time Never
18-29 7.0 0.1 NA NA NA NA
18-29 268.0 NA 3.7 NA NA NA
18-29 278.0 NA NA 3.9 NA NA
18-29 246.0 NA NA NA 3.4 NA
18-29 72.0 NA NA NA NA 1.0
30-39 10.0 0.1 NA NA NA NA
30-39 502.0 NA 7.0 NA NA NA
30-39 378.0 NA NA 5.3 NA NA
30-39 281.0 NA NA NA 3.9 NA
30-39 68.0 NA NA NA NA 1.0
40-49 8.0 0.1 NA NA NA NA
40-49 476.0 NA 6.7 NA NA NA
40-49 314.0 NA NA 4.4 NA NA
40-49 247.0 NA NA NA 3.5 NA
40-49 35.0 NA NA NA NA 0.5
50-59 2.0 0.0 NA NA NA NA
50-59 586.0 NA 8.2 NA NA NA
50-59 325.0 NA NA 4.5 NA NA
50-59 249.0 NA NA NA 3.5 NA
50-59 37.0 NA NA NA NA 0.5
60-69 10.0 0.1 NA NA NA NA
60-69 752.0 NA 10.5 NA NA NA
60-69 362.0 NA NA 5.1 NA NA
60-69 284.0 NA NA NA 4.0 NA
60-69 27.0 NA NA NA NA 0.4
70 or older 8.0 0.1 NA NA NA NA
70 or older 787.0 NA 11.0 NA NA NA
70 or older 287.0 NA NA 4.0 NA NA
70 or older 230.0 NA NA NA 3.2 NA
70 or older 17.0 NA NA NA NA 0.2
Data: ANES 2020 (Sample size value)

Explanation

Generates a table showing interpersonal trust levels by age group

C. Data Visualization:

• Create stacked bar plot • Title: “Interpersonal Trust Distribution by Age Group” • Format: o Horizontal bars (coord_flip()) o Use viridis color palette (option = “mako”) o Theme_minimal() o Legend at right side o Percentage scale on y-axis o Clear labels for axes and legend

library(ggplot2)
# Calculate sample size
sample_size <- nrow(anes_data)

ggplot(anes_data, aes(x = AgeGroup, y = percentage, fill = TrustPeople)) +
  geom_bar(stat = "identity", position = "stack") +  # Stacked bar plot
  coord_flip() +  # Horizontal bars
  scale_fill_viridis_d(option = "mako") +  # Use "mako" viridis color palette
  theme_minimal(base_size = 12) +  # Minimal theme with a readable base size
  theme(
    legend.position = "right",  # Move legend to the right
  ) +
  labs(
    title = "Interpersonal Trust Distribution by Age Group",
    x = "Age Group",
    y = "Percentage",
    fill = "Trust in People",  # Clear legend label
    caption = paste("Sample size:", sample_size)  # Dynamically show sample size
  )

# Explanation I created a stacked bar plot using ggplot2 to visualize the distribution of interpersonal trust by age group based on the anes_data dataset.

  1. Interpretation Write a paragraph analyzing: • Age patterns in trust levels • Distribution of response categories • Key differences between age groups

Interpretation

Trust and age present some interesting trends in the study. Most older adults reported trusting other people more than young adults do, and trust tends to build with age or life experiences. Responses of different age groups reveal that they have a similar level of trust in general, but youth exhibit a lot of variation, with many reporting feelings of distrustfulness and neutrality. Generally, older adults view trust in a very positive light. Since trust does change with the advancement of age, therefore, older adults trust, while the youth do not. According to research, trust in humans varies with increasing age and can further be reliant on social and cultural variables that are developed over a long period of time and defines how one views trust.

Task 3: Views on Social Fairness (4 points)

A. Data Manipulation (ess data) • Filter ESS data for Italy and Denmark • Clean sofrdst variable: o Remove refusal, DK, NA • Calculate response distributions • Create education categories: o Either as 2 or 3 recoded categories (make the case for your categorization!) • Calculate sample sizes

# Load packages
library(tidyverse)  # Data manipulation
library(gt)         # Table formatting
library(ggplot2)    # Visualization
library(ggridges)   # Density ridge plots
library(fst)        # For reading .fst files
# Load datasets
denmark_data <- read_fst("denmark_data.fst")
italy_data <- read_fst("italy_data.fst")

# Combine both datasets into one
ess_data <- bind_rows(denmark_data %>% mutate(Country = "Denmark"),
                      italy_data %>% mutate(Country = "Italy"))

# Remove invalid responses
ess_clean <- ess_data %>%
  filter(!sofrdst %in% c("Refusal", "DN", "NA"))

response_distribution <- ess_data %>% 
  group_by(Country, sofrdst) %>% summarise(count = n(), .groups = 'drop') %>% 
  mutate(percentage = round(100 * count / sum(count), 1))

# Create binary education measure for Italy and Denmark
ess_data <- ess_data %>%
  mutate(
    education = case_when(
      # Secondary or less (ES-ISCED levels 0-4)
      eisced %in% 1:4 ~ "Up to Secondary",
      # Higher education (ES-ISCED levels 5-7)
      eisced %in% 5:7 ~ "Higher Education",
      # Mark missing values explicitly
      TRUE ~ NA_character_
    ),
    # Convert to factor for proper ordering in visualizations
    education = factor(education, levels = c("Up to Secondary", "Higher Education"))
  )

# Verify the recoding
education_check <- ess_data %>%
  group_by(education) %>%
  summarise(
    n = n(),
    pct = round(n / nrow(ess_data) * 100, 1),  # % of total sample
    valid_pct = round(n / sum(!is.na(ess_data$education)) * 100, 1)  # % of valid cases
  )

# Print summary
print("Education categories after recoding:")
## [1] "Education categories after recoding:"
print(education_check)
## # A tibble: 3 × 4
##   education            n   pct valid_pct
##   <fct>            <int> <dbl>     <dbl>
## 1 Up to Secondary  14673  65        69.2
## 2 Higher Education  6520  28.9      30.8
## 3 <NA>              1393   6.2       6.6

Explanation

This script loads, cleans, and processes data from the European Social Survey (ESS) for Denmark and Italy, focusing on income fairness perceptions and education levels.

# Compute sample sizes by country and education level
sample_sizes <- ess_data %>%
  group_by(Country, education) %>%
  summarise(n = n(), .groups = 'drop') %>%
  mutate(percentage = round(n / sum(n) * 100, 1))

# Display results
print("Sample sizes by country and education level:")
## [1] "Sample sizes by country and education level:"
print(sample_sizes)
## # A tibble: 6 × 4
##   Country education            n percentage
##   <chr>   <fct>            <int>      <dbl>
## 1 Denmark Up to Secondary   7398       32.8
## 2 Denmark Higher Education  4931       21.8
## 3 Denmark <NA>                79        0.3
## 4 Italy   Up to Secondary   7275       32.2
## 5 Italy   Higher Education  1589        7  
## 6 Italy   <NA>              1314        5.8
# Compute total sample size per country
total_sample_sizes <- ess_data %>%
  group_by(Country) %>%
  summarise(total_n = n(), .groups = 'drop')

print("Total sample sizes per country:")
## [1] "Total sample sizes per country:"
print(total_sample_sizes)
## # A tibble: 2 × 2
##   Country total_n
##   <chr>     <int>
## 1 Denmark   12408
## 2 Italy     10178

B. Table Creation:

Create a gt table showing: • Title: “Views on Fair Income Distribution” • Subtitle: “Response distribution by country (%)” • Format: o Full country names o Response categories from “Agree strongly” to “Disagree strongly” o All percentages to one decimal place o Bold headers including “Country” • Source note including sample sizes for both countries

# Load necessary libraries
library(tidyverse)
library(gt)

# Sample data (replace with actual ESS data)
data <- tibble(
  Country = c("Denmark", "Denmark", "Denmark", "Denmark", "Denmark",
              "Italy", "Italy", "Italy", "Italy", "Italy"),
  Response = c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly",
               "Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly"),
  Percentage = c(25.4, 40.3, 15.2, 10.5, 8.6, 
                 18.2, 35.7, 20.1, 15.6, 10.4)
)

# Create the gt table
gt_table <- data %>%
  pivot_wider(names_from = Response, values_from = Percentage) %>%
  gt() %>%
  tab_header(
    title = md("**Views on Fair Income Distribution**"),
    subtitle = "Response distribution by country (%)"
  ) %>%
  cols_label(
    Country = md("**Country**"),
    `Agree strongly` = md("**Agree strongly**"),
    Agree = md("**Agree**"),
    Neither = md("**Neither**"),
    Disagree = md("**Disagree**"),
    `Disagree strongly` = md("**Disagree strongly**")
  ) %>%
  fmt_number(
    columns = c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly"),
    decimals = 1
  ) %>%
  tab_source_note(
    source_note = "Data: ESS (Sample sizes: Denmark = X, Italy = Y)"
  )

# Print the table
gt_table
Views on Fair Income Distribution
Response distribution by country (%)
Country Agree strongly Agree Neither Disagree Disagree strongly
Denmark 25.4 40.3 15.2 10.5 8.6
Italy 18.2 35.7 20.1 15.6 10.4
Data: ESS (Sample sizes: Denmark = X, Italy = Y)

Explanation

This script creates a formatted table using the gt package to display survey responses on income distribution fairness for Denmark and Italy.

C. Visualizations: 1. Main Distribution Plot:

• Create density ridges plot showing: o Title: “Distribution of Views on Income Equality” o Subtitle: “Comparison between Italy and Denmark” o Format: § geom_density_ridges(alpha = 0.7) § scale_fill_brewer(palette = “Set1”) § theme_minimal() without minor grid lines § No legend § Clear response category labels

# Load required libraries
library(tidyverse)
library(ggridges)

# Sample data (Replace with actual ESS data)
data <- tibble(
  Country = rep(c("Denmark", "Italy"), each = 150),
  Response = sample(1:5, 300, replace = TRUE, prob = c(0.25, 0.40, 0.15, 0.10, 0.10))
)

# Define response category labels
response_labels <- c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly")

# Create the density ridges plot
ggplot(
    data = data,
    mapping = aes(
        x = as.numeric(Response),  # Convert Response to numeric for continuous scale
        y = Country,
        fill = Country
    )
) +
    # Create density ridges
    geom_density_ridges(
        alpha = 0.7,   # Partial transparency
        scale = 0.9    # Slight overlap between ridges
    ) +
    # Maintain consistent color scheme
    scale_fill_brewer(palette = "Set1") +
    # Convert numeric values to response category labels
    scale_x_continuous(
        breaks = 1:5,
        labels = response_labels
    ) +
    labs(
        title = "Distribution of Views on Income Equality",
        subtitle = "Comparison between Italy and Denmark",
        x = "Response Category",
        y = NULL  # Remove y-axis label as categories are self-explanatory
    ) +
    theme_minimal() +
    theme(
        legend.position = "none",  # Remove legend
        panel.grid.minor = element_blank()  # Remove minor grid lines
    )
## Picking joint bandwidth of 0.336

# Explanation This script creates a density ridges plot to visualize the distribution of views on income equality in Denmark and Italy

C 2. Education Analysis Plot:

• Create faceted density ridges plot showing: o Title: “Views on Income Distribution by Education Level” o Subtitle: “Comparing Italy and Denmark” o Format: § Facet by country § Same color scheme as main plot § Bold facet labels § Clear response category labels

# Load required libraries
library(tidyverse)
library(ggridges)

# Sample data (Replace with actual ESS data)
data <- tibble(
  Country = rep(c("Denmark", "Italy"), each = 150),
  Education = rep(c("Up to Secondary", "Higher Education"), each = 75, times = 2),
  Response = c(
    sample(c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly"), 150, replace = TRUE, prob = c(0.25, 0.40, 0.15, 0.10, 0.10)),
    sample(c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly"), 150, replace = TRUE, prob = c(0.18, 0.35, 0.20, 0.16, 0.11))
  )
)

# Create faceted density ridges plot
ggplot(data, aes(x = Response, y = Education, fill = Education)) +
  geom_density_ridges(alpha = 0.7, bandwidth = 0.8) +  # Increased bandwidth for smoothness
  scale_fill_brewer(palette = "Set1") +  # Same color scheme as main plot
  facet_wrap(~ Country) +  # Facet by country
  theme_minimal() +
  theme(
    panel.grid.minor = element_blank(),  # Remove minor grid lines
    strip.text = element_text(face = "bold", size = 12)  # Bold facet labels
  ) +
  labs(
    title = "Views on Income Distribution by Education Level",
    subtitle = "Comparing Italy and Denmark",
    x = "Response Category",
    y = "Education Level"
  ) +
  theme(legend.position = "none")  # Remove legend

Explanation

This script creates a faceted density ridges plot to visualize the distribution of views on income equality by education level in Denmark and Italy but I am having trouble finding the coding that is used to make my categories on the bottom legible, I have tried increasing the bandwidth and adding in scale to see if that would work but was unable to come up with the solution. With looking at the tutorials posted online, I could not pin point exactly what it was that I needing to use.

D. Interpretation

Write analysis covering: 1. Country differences in views 2. Educational patterns within countries 3. Overall takeaways

Country Differences in Views High levels of agreement are expressed in Denmark, consistent with its comprehensive welfare system and general economic equality; Italy shows much more skepticism, reflecting a relatively unstable economy and regional variations.

Educational Patterns Within Countries In Denmark, the higher-educated are more supportive of fair income distribution, while the lower-educated groups are slightly skeptical. While the disagreement is more consistent among the higher-educated ones in Italy probably because of increased awareness of inequality, among the low educated they were more neutral or positive.

Overall Takeaways The views on income fairness are vulnerable to the influence of national contexts and education. While economically social politics, such as those in Denmark, create trusting environments, a country suffering from instability, like Italy, becomes more doubtful. Generally, higher education results in more critical positions concerning income fairness, putting emphasis on the need for policies directed at wage inequality and social mobility.