# Install and load necessary packages 
packages <- c("tidyverse", "gt", "gapminder", "srvyr", "srvyrexploR", "fst", "ggridges")

# Check for missing packages and install them
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load packages
lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Attaching package: 'srvyr'
## 
## 
## The following object is masked from 'package:stats':
## 
##     filter

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "gt"        "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "gapminder" "gt"        "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "srvyr"     "gapminder" "gt"        "lubridate" "forcats"   "stringr"  
##  [7] "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [13] "tidyverse" "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [19] "methods"   "base"     
## 
## [[5]]
##  [1] "srvyrexploR" "srvyr"       "gapminder"   "gt"          "lubridate"  
##  [6] "forcats"     "stringr"     "dplyr"       "purrr"       "readr"      
## [11] "tidyr"       "tibble"      "ggplot2"     "tidyverse"   "stats"      
## [16] "graphics"    "grDevices"   "utils"       "datasets"    "methods"    
## [21] "base"       
## 
## [[6]]
##  [1] "fst"         "srvyrexploR" "srvyr"       "gapminder"   "gt"         
##  [6] "lubridate"   "forcats"     "stringr"     "dplyr"       "purrr"      
## [11] "readr"       "tidyr"       "tibble"      "ggplot2"     "tidyverse"  
## [16] "stats"       "graphics"    "grDevices"   "utils"       "datasets"   
## [21] "methods"     "base"       
## 
## [[7]]
##  [1] "ggridges"    "fst"         "srvyrexploR" "srvyr"       "gapminder"  
##  [6] "gt"          "lubridate"   "forcats"     "stringr"     "dplyr"      
## [11] "purrr"       "readr"       "tidyr"       "tibble"      "ggplot2"    
## [16] "tidyverse"   "stats"       "graphics"    "grDevices"   "utils"      
## [21] "datasets"    "methods"     "base"

Task 1: Global Life Expectancy Changes

Data Manipulation (gapminder data) • Filter for years 1987 and 2007 • Calculate mean life expectancy by continent for each year • Calculate the change between years • In a separate step, filter to five focal countries (Niger, Bangladesh, El Salvador, Iraq, Zimbabwe)

gapminder_filtered <- gapminder %>% 
  filter(year %in% c(1987, 2007)) %>% 
  group_by(continent, country, year) %>% 
  summarise(mean_lifeExp = mean(lifeExp, na.rm = TRUE), .groups = "drop") %>% 
  pivot_wider(names_from = year, values_from = mean_lifeExp) %>% 
  mutate(Change = `2007` - `1987`
  ) %>% 
  arrange(desc(Change))
gapminder_filtered

## # A tibble: 142 × 5
##    continent country    `1987` `2007` Change
##    <fct>     <fct>       <dbl>  <dbl>  <dbl>
##  1 Africa    Niger        44.6   56.9   12.3
##  2 Africa    Eritrea      46.5   58.0   11.6
##  3 Africa    Egypt        59.8   71.3   11.5
##  4 Asia      Vietnam      62.8   74.2   11.4
##  5 Asia      Nepal        52.5   63.8   11.2
##  6 Asia      Bangladesh   52.8   64.1   11.2
##  7 Americas  Nicaragua    62.0   72.9   10.9
##  8 Asia      Indonesia    60.1   70.6   10.5
##  9 Africa    Guinea       45.6   56.0   10.5
## 10 Africa    Comoros      54.9   65.2   10.2
## # ℹ 132 more rows

• In a separate step, filter to five focal countries (Niger, Bangladesh, El Salvador, Iraq, Zimbabwe)

# Filter five focal countries
gapminder_countries <- gapminder %>% 
  filter(year %in% c(1987, 2007), country %in% c("Niger", "Bangladesh", "El Salvador", "Iraq", "Zimbabwe"))

Table Creation: • Title: Life Expectancy Changes by Continent • Subtitle: Average life expectancy in years • Columns: o 1987 values o 2007 values o Change (2007-1987)

library(gt)

gt_table <- gapminder_filtered %>% 
  gt() %>% 
  tab_header(
    title = md("**<span style='font-size:14pt'>Life Expectancy Changes by Continent</span>**"),
    subtitle = md("<span style='font-size:12pt'>Average life expectancy in years</span>")
  ) %>%
  fmt_number(columns = c(`1987`, `2007`, Change), decimals = 1) %>%
  cols_label(
    continent = md("**Continent**"),
    `1987` = md("**1987**"),
    `2007` = md("**2007**"),
    Change = md("**Change**")
  ) %>%
  tab_source_note("Data: Gapminder")

gt_table

Continent	country	1987	2007	Change
Life Expectancy Changes by Continent
Average life expectancy in years
Africa	Niger	44.6	56.9	12.3
Africa	Eritrea	46.5	58.0	11.6
Africa	Egypt	59.8	71.3	11.5
Asia	Vietnam	62.8	74.2	11.4
Asia	Nepal	52.5	63.8	11.2
Asia	Bangladesh	52.8	64.1	11.2
Americas	Nicaragua	62.0	72.9	10.9
Asia	Indonesia	60.1	70.7	10.5
Africa	Guinea	45.6	56.0	10.5
Africa	Comoros	54.9	65.2	10.2
Africa	Gambia	49.3	59.4	10.2
Africa	Madagascar	49.4	59.4	10.1
Asia	Yemen, Rep.	52.9	62.7	9.8
Americas	Guatemala	60.8	70.3	9.5
Asia	Korea, Rep.	69.8	78.6	8.8
Americas	El Salvador	63.2	71.9	8.7
Europe	Turkey	63.1	71.8	8.7
Africa	Morocco	62.7	71.2	8.5
Americas	Bolivia	57.3	65.6	8.3
Africa	Mali	46.4	54.5	8.1
Africa	Mauritania	56.1	64.2	8.0
Asia	Iran	63.0	71.0	7.9
Asia	Oman	67.7	75.6	7.9
Americas	Ecuador	67.2	75.0	7.8
Africa	Libya	66.2	74.0	7.7
Asia	Philippines	64.2	71.7	7.5
Africa	Senegal	55.8	63.1	7.3
Americas	Peru	64.1	71.4	7.3
Americas	Haiti	53.6	60.9	7.3
Asia	Pakistan	58.2	65.5	7.2
Americas	Brazil	65.2	72.4	7.2
Asia	Syria	67.0	74.1	7.2
Africa	Tunisia	66.9	73.9	7.0
Africa	Sudan	51.7	58.6	6.8
Americas	Mexico	69.5	76.2	6.7
Asia	Jordan	65.9	72.5	6.7
Asia	Mongolia	60.2	66.8	6.6
Africa	Algeria	65.8	72.3	6.5
Asia	Saudi Arabia	66.3	72.8	6.5
Asia	Singapore	73.6	80.0	6.4
Asia	West Bank and Gaza	67.0	73.4	6.4
Africa	Ethiopia	46.7	52.9	6.3
Americas	Dominican Republic	66.0	72.2	6.2
Asia	India	58.6	64.7	6.1
Americas	Chile	72.5	78.6	6.1
Asia	Hong Kong, China	76.2	82.2	6.0
Africa	Equatorial Guinea	45.7	51.6	5.9
Oceania	New Zealand	74.3	80.2	5.9
Asia	Cambodia	53.9	59.7	5.8
Americas	Honduras	64.5	70.2	5.7
Asia	China	67.3	73.0	5.7
Europe	Slovenia	72.2	77.9	5.7
Asia	Israel	75.6	80.7	5.1
Africa	Guinea-Bissau	41.2	46.4	5.1
Americas	Colombia	67.8	72.9	5.1
Asia	Taiwan	73.4	78.4	5.0
Oceania	Australia	76.3	81.2	4.9
Europe	Czech Republic	71.6	76.5	4.9
Europe	Austria	74.9	79.8	4.9
Asia	Bahrain	70.8	75.6	4.9
Africa	Djibouti	50.0	54.8	4.8
Asia	Malaysia	69.5	74.2	4.7
Europe	Poland	71.0	75.6	4.6
Europe	Germany	74.8	79.4	4.6
Americas	Argentina	70.8	75.3	4.5
Asia	Thailand	66.1	70.6	4.5
Africa	Reunion	71.9	76.4	4.5
Europe	Iceland	77.2	81.8	4.5
Europe	Ireland	74.4	78.9	4.5
Europe	Finland	74.8	79.3	4.5
Americas	Uruguay	71.9	76.4	4.5
Europe	Albania	72.0	76.4	4.4
Europe	United Kingdom	75.0	79.4	4.4
Africa	Benin	52.3	56.7	4.4
Americas	Paraguay	67.4	71.8	4.4
Europe	France	76.3	80.7	4.3
Europe	Norway	75.9	80.2	4.3
Africa	Ghana	55.7	60.0	4.3
Europe	Switzerland	77.4	81.7	4.3
Europe	Croatia	71.5	75.7	4.2
Europe	Italy	76.4	80.5	4.1
Americas	Puerto Rico	74.6	78.7	4.1
Americas	Cuba	74.2	78.3	4.1
Europe	Belgium	75.3	79.4	4.1
Asia	Lebanon	67.9	72.0	4.1
Africa	Mauritius	68.7	72.8	4.1
Europe	Spain	76.9	80.9	4.0
Europe	Portugal	74.1	78.1	4.0
Americas	Costa Rica	74.8	78.8	4.0
Americas	Panama	71.5	75.5	4.0
Asia	Japan	78.7	82.6	3.9
Africa	Sao Tome and Principe	61.7	65.5	3.8
Americas	Canada	76.9	80.7	3.8
Europe	Hungary	69.6	73.3	3.8
Asia	Myanmar	58.3	62.1	3.7
Europe	Bosnia and Herzegovina	71.1	74.9	3.7
Europe	Sweden	77.2	80.9	3.7
Africa	Somalia	44.5	48.2	3.7
Europe	Slovak Republic	71.1	74.7	3.6
Americas	Venezuela	70.2	73.7	3.6
Europe	Denmark	74.8	78.3	3.5
Asia	Kuwait	74.2	77.6	3.4
Asia	Sri Lanka	69.0	72.4	3.4
Americas	United States	75.0	78.2	3.2
Asia	Afghanistan	40.8	43.8	3.0
Europe	Romania	69.5	72.5	2.9
Europe	Netherlands	76.8	79.8	2.9
Africa	Angola	39.9	42.7	2.8
Europe	Greece	76.7	79.5	2.8
Europe	Serbia	71.2	74.0	2.8
Africa	Burkina Faso	49.6	52.3	2.7
Africa	Sierra Leone	40.0	42.6	2.6
Africa	Rwanda	44.0	46.2	2.2
Europe	Bulgaria	71.3	73.0	1.7
Africa	Togo	56.9	58.4	1.5
Africa	Burundi	48.2	49.6	1.4
Africa	Tanzania	51.5	52.5	1.0
Africa	Malawi	47.5	48.3	0.8
Americas	Jamaica	71.8	72.6	0.8
Americas	Trinidad and Tobago	69.6	69.8	0.2
Africa	Uganda	51.5	51.5	0.0
Africa	Nigeria	46.9	46.9	0.0
Europe	Montenegro	74.9	74.5	−0.3
Africa	Liberia	46.0	45.7	−0.3
Africa	Chad	51.1	50.7	−0.4
Africa	Mozambique	42.9	42.1	−0.8
Africa	Congo, Dem. Rep.	47.4	46.5	−0.9
Africa	Congo, Rep.	57.5	55.3	−2.1
Asia	Korea, Dem. Rep.	70.6	67.3	−3.4
Africa	Gabon	60.2	56.7	−3.5
Africa	Cameroon	55.0	50.4	−4.6
Africa	Kenya	59.3	54.1	−5.2
Asia	Iraq	65.0	59.5	−5.5
Africa	Central African Republic	50.5	44.7	−5.7
Africa	Cote d'Ivoire	54.7	48.3	−6.3
Africa	Namibia	60.8	52.9	−7.9
Africa	Zambia	50.8	42.4	−8.4
Africa	South Africa	60.8	49.3	−11.5
Africa	Botswana	63.6	50.7	−12.9
Africa	Lesotho	57.2	42.6	−14.6
Africa	Swaziland	57.7	39.6	−18.1
Africa	Zimbabwe	62.4	43.5	−18.9
Data: Gapminder

Data Visualization: • Title: “Life Expectancy Trajectories (1987-2007)” • Subtitle: “in Selected Countries” • Format: o Lines with size = 1.5 o Use scale_color_brewer(palette = “Set1”) o theme_minimal() with no minor grid lines o Legend at bottom o Title in bold (size = 14) o Subtitle size = 12 o Clear axis labels (“Year”, “Life Expectancy (years)”)

library(ggplot2)
library(dplyr)

ggplot(gapminder_countries, aes(x = year, y = lifeExp, color = country)) +
  geom_line(size = 1.5) +  # Set line size
  scale_color_brewer(palette = "Set1") +  # Use Set1 color palette
  theme_minimal(base_size = 12) +  # Base theme size
  theme(
    panel.grid.minor = element_blank(),  # Remove minor grid lines
    legend.position = "bottom",  # Move legend to bottom
    plot.title = element_text(face = "bold", size = 14),  # Bold title with size 14
    plot.subtitle = element_text(size = 12)  # Subtitle with size 12
  ) +
  labs(
    title = "Life Expectancy Trajectories (1987-2007)",
    subtitle = "in Selected Countries",
    x = "Year",
    y = "Life Expectancy (years)",
    color = "Country"
  )

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Task 2: Interpersonal Trust Patterns (3 points)

Data Manipulation (anes_2020 data) • Remove missing values for the variables TrustPeople and AgeGroup • Calculate percentage of trust categories by age group • Calculate total sample size

anes_data <- anes_2020 %>% 
  filter(!is.na(TrustPeople), !is.na(AgeGroup)) %>%  # Remove missing values
  group_by(AgeGroup, TrustPeople) %>% 
  summarise(count = n(), .groups = "drop") %>%  # Fix: Drop grouping after summarization
  mutate(percentage = count / sum(count) * 100)

Table Creation: • Title: “Interpersonal Trust by Age Group” • Subtitle: “Distribution of responses (percentages)” • Format: o All percentages to one decimal place o Bold “Age Group” header o All trust categories as column headers • Source note: “Data: ANES 2020 (sample size value)”

gt_anes <- anes_data %>%
  pivot_wider(names_from = TrustPeople, values_from = percentage) %>%
  gt() %>%
  tab_header(
    title = md("**Interpersonal Trust by Age Group**"),
    subtitle = "Distribution of responses (percentages)"
  ) %>%
  fmt_number(columns = everything(), decimals = 1) %>%
  cols_label(AgeGroup = md("**Age Group**")) %>%
  tab_source_note("Data: ANES 2020 (Sample size value)")

gt_anes

Age Group	count	Always	Most of the time	About half the time	Some of the time	Never
Interpersonal Trust by Age Group
Distribution of responses (percentages)
18-29	7.0	0.1	NA	NA	NA	NA
18-29	268.0	NA	3.7	NA	NA	NA
18-29	278.0	NA	NA	3.9	NA	NA
18-29	246.0	NA	NA	NA	3.4	NA
18-29	72.0	NA	NA	NA	NA	1.0
30-39	10.0	0.1	NA	NA	NA	NA
30-39	502.0	NA	7.0	NA	NA	NA
30-39	378.0	NA	NA	5.3	NA	NA
30-39	281.0	NA	NA	NA	3.9	NA
30-39	68.0	NA	NA	NA	NA	1.0
40-49	8.0	0.1	NA	NA	NA	NA
40-49	476.0	NA	6.7	NA	NA	NA
40-49	314.0	NA	NA	4.4	NA	NA
40-49	247.0	NA	NA	NA	3.5	NA
40-49	35.0	NA	NA	NA	NA	0.5
50-59	2.0	0.0	NA	NA	NA	NA
50-59	586.0	NA	8.2	NA	NA	NA
50-59	325.0	NA	NA	4.5	NA	NA
50-59	249.0	NA	NA	NA	3.5	NA
50-59	37.0	NA	NA	NA	NA	0.5
60-69	10.0	0.1	NA	NA	NA	NA
60-69	752.0	NA	10.5	NA	NA	NA
60-69	362.0	NA	NA	5.1	NA	NA
60-69	284.0	NA	NA	NA	4.0	NA
60-69	27.0	NA	NA	NA	NA	0.4
70 or older	8.0	0.1	NA	NA	NA	NA
70 or older	787.0	NA	11.0	NA	NA	NA
70 or older	287.0	NA	NA	4.0	NA	NA
70 or older	230.0	NA	NA	NA	3.2	NA
70 or older	17.0	NA	NA	NA	NA	0.2
Data: ANES 2020 (Sample size value)

Data Visualization: • Create stacked bar plot • Title: “Interpersonal Trust Distribution by Age Group” • Format: o Horizontal bars (coord_flip()) o Use viridis color palette (option = “mako”) o Theme_minimal() o Legend at right side o Percentage scale on y-axis o Clear labels for axes and legend o Caption showing sample size

# Calculate sample size
sample_size <- nrow(anes_data)

ggplot(anes_data, aes(x = AgeGroup, y = percentage, fill = TrustPeople)) +
  geom_bar(stat = "identity", position = "stack") +  # Stacked bar plot
  coord_flip() +  # Horizontal bars
  scale_fill_viridis_d(option = "mako") +  # Use "mako" viridis color palette
  theme_minimal(base_size = 12) +  # Minimal theme with a readable base size
  theme(
    legend.position = "right",  # Move legend to the right
  ) +
  labs(
    title = "Interpersonal Trust Distribution by Age Group",
    x = "Age Group",
    y = "Percentage",
    fill = "Trust in People",  # Clear legend label
    caption = paste("Sample size:", sample_size)  # Dynamically show sample size
  )

Interpretation Write a paragraph analyzing: • Age patterns in trust levels • Distribution of response categories • Key differences between age groups

The study of trust between different age groups can be seen as interesting. Older adults, from ages 50-70 and older, appear to be more trusting in others than the younger adults. As an individual gets older, trust for others become more biddable due to life experience and maturity. Responses between age groups appear to have similar levels of trust, but the youth show more contrast in distrust. The data shows older adults are reporting more under ‘Most of the time’ for trust in others. This could be because older participants have a more positive outlook of others and trusting in someone could come naturally. Trust varies with age. Older adults trust more in others, but youth does not.

Task 3: Views on Social Fairness (4 points)

Data Manipulation (ess data) • Filter ESS data for Italy and Denmark • Clean sofrdst variable: o Remove refusal, DK, NA • Calculate response distributions • Create education categories: o Either as 2 or 3 recoded categories (make the case for your categorization!) • Calculate sample sizes

# Load packages
library(tidyverse) # Data manipulation
library(gt) # Table formatting
library(ggplot2) # Visualization
library(ggridges) # Density ridge plots 
library(fst) # For reading .fst files

# Load datasets 
denmark_data <- read_fst("denmark_data.fst")
italy_data <- read_fst("italy_data.fst")

# Combine both datasets into one
ess_data <- bind_rows(denmark_data %>% mutate(Country = "Denmark"),
                      italy_data %>% mutate(Country = "Italy"))

# Remove invalid responses 
ess_clean <- ess_data %>%
  filter(!sofrdst %in% c("Refusal", "DN", "NA"))

response_distribution <- ess_data %>%
  group_by(Country, sofrdst) %>% summarise(count = n(), .groups = 'drop') %>%
  mutate(percentage = round(100 * count / sum(count), 1))

# Create binary education measure for Italy and Denmark
ess_data <- ess_data %>%
  mutate(
    education = case_when(
      # Secondary or less (ES-ISCED levels 0-4)
      eisced %in% 1:4 ~ "Up to Secondary",
      # Higher education (ES-ISCED levels 5-7)
      eisced %in% 5:7 ~ "Higher Education",
      # Mark missing values explicity 
      TRUE ~ NA_character_
    ),
    # Convert to factor for proper ordering in visulizations
    education = factor(education, levels = c("Up to secondary", "Higher Education"))
  )

#Verify the recording
education_check <- ess_data %>%
  group_by(education) %>%
  summarise(
    n = n(),
    pct = round(n / nrow(ess_data) * 100, 1), # % of total sample 
    valid_pct = round(n / sum(!is.na(ess_data$education)) * 100, 1) # % of valid cases
  )

# Print summary
print("Education categories after recording:")

## [1] "Education categories after recording:"

print(education_check)

## # A tibble: 2 × 4
##   education            n   pct valid_pct
##   <fct>            <int> <dbl>     <dbl>
## 1 Higher Education  6520  28.9      100 
## 2 <NA>             16066  71.1      246.

# Compute sample sizes by country and education level
sample_sizes <- ess_data %>%
  group_by(Country, education) %>%
  summarise(n = n(), .groups = 'drop') %>%
  mutate(percentage = round(n / sum(n) * 100, 1))

# Display results
print("Sample sizes by country and education level:")

## [1] "Sample sizes by country and education level:"

print(sample_sizes)

## # A tibble: 4 × 4
##   Country education            n percentage
##   <chr>   <fct>            <int>      <dbl>
## 1 Denmark Higher Education  4931       21.8
## 2 Denmark <NA>              7477       33.1
## 3 Italy   Higher Education  1589        7  
## 4 Italy   <NA>              8589       38

# Compute tital sample size per country 
total_sample_sizes <- ess_data %>%
  group_by(Country) %>%
  summarise(total_n = n(), .groups = 'drop')

print("Total sample sizes per country:")

## [1] "Total sample sizes per country:"

print(total_sample_sizes)

## # A tibble: 2 × 2
##   Country total_n
##   <chr>     <int>
## 1 Denmark   12408
## 2 Italy     10178

Table Creation: Create a gt table showing: • Title: “Views on Fair Income Distribution” • Subtitle: “Response distribution by country (%)” • Format: o Full country names o Response categories from “Agree strongly” to “Disagree strongly” o All percentages to one decimal place o Bold headers including “Country” • Source note including sample sizes for both countries

# Load necessary libraries
library(tidyverse)
library(gt)

#Sample data (replace with actual ESS data)
data <- tibble(
  Country = c("Denmark", "Denmark", "Denmark", "Denamrk", "Denamrk",
              "Italy", "Italy", "Italy", "Italy", "Italy"),
  Response = c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly",
               "Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly"),
  Percentage = c(25.4, 40.3, 15.2, 10.5, 8.6,
                 18.2, 35.7, 20.1, 15.6, 10.4)
  )

# Create the gt table
gt_table <- data %>%
  pivot_wider(names_from = Response, values_from = Percentage) %>%
  gt() %>%
   tab_header(
    title = md("**Views on Fair Income Distribution**"),
    subtitle = "Response distribution by country (%)"
  ) %>%
  cols_label(
    Country = md("**Country**"),
    'Agree strongly' = md("**Agree strongly**"),
    Agree = md("**Neither**"),
    Neither = md("**Niether**"),
    Disagree = md("**Disagree**"),
   'Disagree strongly' = md("**Disagree strongly**")
  ) %>%
  fmt_number(
    columns = c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly"),
    decimals = 1
  ) %>%
  tab_source_note(
    source_note = "Data: ESS (Sample sizes: Denmark = X, Italy = Y)'"
  )
# Print the table 
gt_table

Country	Agree strongly	Neither	Niether	Disagree	Disagree strongly
Views on Fair Income Distribution
Response distribution by country (%)
Denmark	25.4	40.3	15.2	NA	NA
Denamrk	NA	NA	NA	10.5	8.6
Italy	18.2	35.7	20.1	15.6	10.4
Data: ESS (Sample sizes: Denmark = X, Italy = Y)'

Visualizations:

Main Distribution Plot: • Create density ridges plot showing: o Title: “Distribution of Views on Income Equality” o Subtitle: “Comparison between Italy and Denmark” o Format: § geom_density_ridges(alpha = 0.7) § scale_fill_brewer(palette = “Set1”) § theme_minimal() without minor grid lines § No legend § Clear response category labels
Education Analysis Plot: • Create faceted density ridges plot showing: o Title: “Views on Income Distribution by Education Level” o Subtitle: “Comparing Italy and Denmark” o Format: § Facet by country § Same color scheme as main plot § Bold facet labels § Clear response category labels

# Load required libraries 
library(tidyverse)
library(ggridges)

#Sample data (Replace with actual ESS data)
data <- tibble(
  Country = rep(c("Denamrk", "Italy"), each = 100),
  Response = c(
    sample(c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly"), 100, replace = TRUE, prob = c(0.25, 0.40, 0.15, 0.10, 0.10)),
    sample(c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly"), 100, replace = TRUE, prob = c(0.18, 0.35, 0.20, 0.16, 0.11))
  )
)

# Load required libraries 
library(tidyverse)
library(ggridges)

#Sample data (Replace with actual ESS data
set.seed(123) # For reproducibility
data <- tibble(
  Country = rep(c("Denmark", "Italy"), each = 150),
  Response = sample(1:5, 300, replace = TRUE, prob = c(0.25, 0.40, 0.15, 0.10, 0.10))
)

# Define response category labels 
response_labels <- c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly")

# Create the density ridges plot
ggplot(
  data = data,
  mapping = aes(
    x = as.numeric(Response), # Convert Response to numeric for continuous scale
    y = Country,
    fill = Country
  )
) +
  # Create density ridges
  geom_density_ridges(
    alpha = 0.7, # Partial transparency
    scale = 0.9 # Slight overlap between ridges
  ) +
  #Maintain consistent color scheme
  scale_fill_brewer(palette = "Set1") +
  # Convert numeric values to response category labels
  scale_x_continuous(
    breaks = 1:5,
    labels = response_labels
  ) +
  labs(
    title = "Distribution of Views on Income Equality",
    subtitle = "Comparison between Italy and Denamrk",
    x = "Response Category",
    y = NULL # Remove y-axis label as categories are self explanatory
  ) +
  theme_minimal() +
  theme(
    legend.position = "none", # Remove legend
    panel.grid.minor = element_blank() #Remove minor grid lines 
  )

## Picking joint bandwidth of 0.323

library(tidyverse)
library(ggridges)

# Sample data (Replace with actual ESS data)
set.seed(123) # For reproducibility
data <- tibble(
  Country = rep(c("Denamrk", "Italy"), each = 150),
  Education = rep(c("Low", "Medium", "High"), each = 50, times = 2),
  Response = c(
    sample(c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly"), 150, replace = TRUE, prob = c(0.25, 0.40, 0.15, 0.10, 0.10)),
    sample(c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly"), 150, replace = TRUE, prob = c(0.18, 0.35, 0.20, 0.16, 0.11))
  )
)

# Create faceted density ridges plot
ggplot(data, aes(x = Response, y = Education, fill = Education)) +
  geom_density_ridges(alpha = 0.7, bandwidth = 0.8) + # Increased bandwidth for smoothness
  scale_fill_brewer(palette = "Set1") + # Same color scheme as main plot
  facet_wrap(~ Country) + # Facet by country
  theme_minimal() +
  theme(
    panal.grid.minor = element_blank(), # Remove minor grid lines
    strip.text = element_text(face = "bold", size = 12) # Bold facet labels
  ) +
  labs(
    title = "Views on Income Distribution by Education Level",
    subtitle = "Comparing Italy and Denmark",
    x = "Response Category",
    y = "Education Level"
  ) +
  theme(legend.position = "none") # Remove legend

## Warning in plot_theme(plot): The `panal.grid.minor` theme element is not
## defined in the element hierarchy.

Interpretation Write analysis covering:

Country differences in views
Educational patterns within countries
Overall takeaways

The overlapping density curves show that the two countries’ opinions or attitudes across these education groups vary. Denmark and Italy show similar distributions in terms of educational levels. The overlaps in the graph represent variance within education levels. Overall, the takeaway from the graph is that Denmark and Italy share similar responses.

```

Exercise 1

2025-02-04

Task 1: Global Life Expectancy Changes

Task 2: Interpersonal Trust Patterns (3 points)