# Install and load necessary packages
packages <- c("tidyverse", "gt", "gapminder", "srvyr", "srvyrexploR", "fst", "ggridges")

# Check for missing packages and install them
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load packages
lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Attaching package: 'srvyr'
## 
## 
## The following object is masked from 'package:stats':
## 
##     filter

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "gt"        "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "gapminder" "gt"        "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "srvyr"     "gapminder" "gt"        "lubridate" "forcats"   "stringr"  
##  [7] "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [13] "tidyverse" "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [19] "methods"   "base"     
## 
## [[5]]
##  [1] "srvyrexploR" "srvyr"       "gapminder"   "gt"          "lubridate"  
##  [6] "forcats"     "stringr"     "dplyr"       "purrr"       "readr"      
## [11] "tidyr"       "tibble"      "ggplot2"     "tidyverse"   "stats"      
## [16] "graphics"    "grDevices"   "utils"       "datasets"    "methods"    
## [21] "base"       
## 
## [[6]]
##  [1] "fst"         "srvyrexploR" "srvyr"       "gapminder"   "gt"         
##  [6] "lubridate"   "forcats"     "stringr"     "dplyr"       "purrr"      
## [11] "readr"       "tidyr"       "tibble"      "ggplot2"     "tidyverse"  
## [16] "stats"       "graphics"    "grDevices"   "utils"       "datasets"   
## [21] "methods"     "base"       
## 
## [[7]]
##  [1] "ggridges"    "fst"         "srvyrexploR" "srvyr"       "gapminder"  
##  [6] "gt"          "lubridate"   "forcats"     "stringr"     "dplyr"      
## [11] "purrr"       "readr"       "tidyr"       "tibble"      "ggplot2"    
## [16] "tidyverse"   "stats"       "graphics"    "grDevices"   "utils"      
## [21] "datasets"    "methods"     "base"

Explanation

This script ensures all required libraries are installed and loaded into your R session.

A. Data Manipulation (gapminder data)

• Filter for years 1987 and 2007 • Calculate mean life expectancy by continent for each year • Calculate the change between years • In a separate step, filter to five focal countries (Niger, Bangladesh, El Salvador, Iraq, Zimbabwe)

gapminder_filtered <- gapminder %>% 
  filter(year %in% c(1987, 2007)) %>% 
  group_by(continent, country, year) %>% 
  summarise(mean_lifeExp = mean(lifeExp, na.rm = TRUE), .groups = "drop") %>% 
  pivot_wider(names_from = year, values_from = mean_lifeExp) %>% 
  mutate(Change = `2007` - `1987`
  ) %>% 
  arrange(desc(Change))
gapminder_filtered

## # A tibble: 142 × 5
##    continent country    `1987` `2007` Change
##    <fct>     <fct>       <dbl>  <dbl>  <dbl>
##  1 Africa    Niger        44.6   56.9   12.3
##  2 Africa    Eritrea      46.5   58.0   11.6
##  3 Africa    Egypt        59.8   71.3   11.5
##  4 Asia      Vietnam      62.8   74.2   11.4
##  5 Asia      Nepal        52.5   63.8   11.2
##  6 Asia      Bangladesh   52.8   64.1   11.2
##  7 Americas  Nicaragua    62.0   72.9   10.9
##  8 Asia      Indonesia    60.1   70.6   10.5
##  9 Africa    Guinea       45.6   56.0   10.5
## 10 Africa    Comoros      54.9   65.2   10.2
## # ℹ 132 more rows

Explanation

Filters gapminder for 1987 and 2007, calculates the mean life expectancy per continent, and computes the change over time.

# Filter five focal countries
gapminder_countries <- gapminder %>% 
  filter(year %in% c(1987, 2007), country %in% c("Niger", "Bangladesh", "El Salvador", "Iraq", "Zimbabwe"))

B. Table Creation:

• Title: Life Expectancy Changes by Continent • Subtitle: Average life expectancy in years • Columns: o 1987 values o 2007 values o Change (2007-1987) • Format: o All values to one decimal place o Bold headers (Continent, 1987, 2007, Change) o Order continents by magnitude of change (largest to smallest) • Source note: Data: Gapminder

library(gt)

gt <- gapminder_filtered %>% 
  gt() %>% 
  tab_header(
    title = md("**<span style='font-size:14pt'>Life Expectancy Changes by Continent</span>**"),
    subtitle = md("<span style='font-size:12pt'>Average life expectancy in years</span>")
  ) %>%
  fmt_number(columns = c(`1987`, `2007`, Change), decimals = 1) %>%
  cols_label(
    continent = md("**Continent**"),
    `1987` = md("**1987**"),
    `2007` = md("**2007**"),
    Change = md("**Change**")
  ) %>%
  tab_source_note("Data: Gapminder")

gt

Continent	country	1987	2007	Change
Life Expectancy Changes by Continent
Average life expectancy in years
Africa	Niger	44.6	56.9	12.3
Africa	Eritrea	46.5	58.0	11.6
Africa	Egypt	59.8	71.3	11.5
Asia	Vietnam	62.8	74.2	11.4
Asia	Nepal	52.5	63.8	11.2
Asia	Bangladesh	52.8	64.1	11.2
Americas	Nicaragua	62.0	72.9	10.9
Asia	Indonesia	60.1	70.7	10.5
Africa	Guinea	45.6	56.0	10.5
Africa	Comoros	54.9	65.2	10.2
Africa	Gambia	49.3	59.4	10.2
Africa	Madagascar	49.4	59.4	10.1
Asia	Yemen, Rep.	52.9	62.7	9.8
Americas	Guatemala	60.8	70.3	9.5
Asia	Korea, Rep.	69.8	78.6	8.8
Americas	El Salvador	63.2	71.9	8.7
Europe	Turkey	63.1	71.8	8.7
Africa	Morocco	62.7	71.2	8.5
Americas	Bolivia	57.3	65.6	8.3
Africa	Mali	46.4	54.5	8.1
Africa	Mauritania	56.1	64.2	8.0
Asia	Iran	63.0	71.0	7.9
Asia	Oman	67.7	75.6	7.9
Americas	Ecuador	67.2	75.0	7.8
Africa	Libya	66.2	74.0	7.7
Asia	Philippines	64.2	71.7	7.5
Africa	Senegal	55.8	63.1	7.3
Americas	Peru	64.1	71.4	7.3
Americas	Haiti	53.6	60.9	7.3
Asia	Pakistan	58.2	65.5	7.2
Americas	Brazil	65.2	72.4	7.2
Asia	Syria	67.0	74.1	7.2
Africa	Tunisia	66.9	73.9	7.0
Africa	Sudan	51.7	58.6	6.8
Americas	Mexico	69.5	76.2	6.7
Asia	Jordan	65.9	72.5	6.7
Asia	Mongolia	60.2	66.8	6.6
Africa	Algeria	65.8	72.3	6.5
Asia	Saudi Arabia	66.3	72.8	6.5
Asia	Singapore	73.6	80.0	6.4
Asia	West Bank and Gaza	67.0	73.4	6.4
Africa	Ethiopia	46.7	52.9	6.3
Americas	Dominican Republic	66.0	72.2	6.2
Asia	India	58.6	64.7	6.1
Americas	Chile	72.5	78.6	6.1
Asia	Hong Kong, China	76.2	82.2	6.0
Africa	Equatorial Guinea	45.7	51.6	5.9
Oceania	New Zealand	74.3	80.2	5.9
Asia	Cambodia	53.9	59.7	5.8
Americas	Honduras	64.5	70.2	5.7
Asia	China	67.3	73.0	5.7
Europe	Slovenia	72.2	77.9	5.7
Asia	Israel	75.6	80.7	5.1
Africa	Guinea-Bissau	41.2	46.4	5.1
Americas	Colombia	67.8	72.9	5.1
Asia	Taiwan	73.4	78.4	5.0
Oceania	Australia	76.3	81.2	4.9
Europe	Czech Republic	71.6	76.5	4.9
Europe	Austria	74.9	79.8	4.9
Asia	Bahrain	70.8	75.6	4.9
Africa	Djibouti	50.0	54.8	4.8
Asia	Malaysia	69.5	74.2	4.7
Europe	Poland	71.0	75.6	4.6
Europe	Germany	74.8	79.4	4.6
Americas	Argentina	70.8	75.3	4.5
Asia	Thailand	66.1	70.6	4.5
Africa	Reunion	71.9	76.4	4.5
Europe	Iceland	77.2	81.8	4.5
Europe	Ireland	74.4	78.9	4.5
Europe	Finland	74.8	79.3	4.5
Americas	Uruguay	71.9	76.4	4.5
Europe	Albania	72.0	76.4	4.4
Europe	United Kingdom	75.0	79.4	4.4
Africa	Benin	52.3	56.7	4.4
Americas	Paraguay	67.4	71.8	4.4
Europe	France	76.3	80.7	4.3
Europe	Norway	75.9	80.2	4.3
Africa	Ghana	55.7	60.0	4.3
Europe	Switzerland	77.4	81.7	4.3
Europe	Croatia	71.5	75.7	4.2
Europe	Italy	76.4	80.5	4.1
Americas	Puerto Rico	74.6	78.7	4.1
Americas	Cuba	74.2	78.3	4.1
Europe	Belgium	75.3	79.4	4.1
Asia	Lebanon	67.9	72.0	4.1
Africa	Mauritius	68.7	72.8	4.1
Europe	Spain	76.9	80.9	4.0
Europe	Portugal	74.1	78.1	4.0
Americas	Costa Rica	74.8	78.8	4.0
Americas	Panama	71.5	75.5	4.0
Asia	Japan	78.7	82.6	3.9
Africa	Sao Tome and Principe	61.7	65.5	3.8
Americas	Canada	76.9	80.7	3.8
Europe	Hungary	69.6	73.3	3.8
Asia	Myanmar	58.3	62.1	3.7
Europe	Bosnia and Herzegovina	71.1	74.9	3.7
Europe	Sweden	77.2	80.9	3.7
Africa	Somalia	44.5	48.2	3.7
Europe	Slovak Republic	71.1	74.7	3.6
Americas	Venezuela	70.2	73.7	3.6
Europe	Denmark	74.8	78.3	3.5
Asia	Kuwait	74.2	77.6	3.4
Asia	Sri Lanka	69.0	72.4	3.4
Americas	United States	75.0	78.2	3.2
Asia	Afghanistan	40.8	43.8	3.0
Europe	Romania	69.5	72.5	2.9
Europe	Netherlands	76.8	79.8	2.9
Africa	Angola	39.9	42.7	2.8
Europe	Greece	76.7	79.5	2.8
Europe	Serbia	71.2	74.0	2.8
Africa	Burkina Faso	49.6	52.3	2.7
Africa	Sierra Leone	40.0	42.6	2.6
Africa	Rwanda	44.0	46.2	2.2
Europe	Bulgaria	71.3	73.0	1.7
Africa	Togo	56.9	58.4	1.5
Africa	Burundi	48.2	49.6	1.4
Africa	Tanzania	51.5	52.5	1.0
Africa	Malawi	47.5	48.3	0.8
Americas	Jamaica	71.8	72.6	0.8
Americas	Trinidad and Tobago	69.6	69.8	0.2
Africa	Uganda	51.5	51.5	0.0
Africa	Nigeria	46.9	46.9	0.0
Europe	Montenegro	74.9	74.5	−0.3
Africa	Liberia	46.0	45.7	−0.3
Africa	Chad	51.1	50.7	−0.4
Africa	Mozambique	42.9	42.1	−0.8
Africa	Congo, Dem. Rep.	47.4	46.5	−0.9
Africa	Congo, Rep.	57.5	55.3	−2.1
Asia	Korea, Dem. Rep.	70.6	67.3	−3.4
Africa	Gabon	60.2	56.7	−3.5
Africa	Cameroon	55.0	50.4	−4.6
Africa	Kenya	59.3	54.1	−5.2
Asia	Iraq	65.0	59.5	−5.5
Africa	Central African Republic	50.5	44.7	−5.7
Africa	Cote d'Ivoire	54.7	48.3	−6.3
Africa	Namibia	60.8	52.9	−7.9
Africa	Zambia	50.8	42.4	−8.4
Africa	South Africa	60.8	49.3	−11.5
Africa	Botswana	63.6	50.7	−12.9
Africa	Lesotho	57.2	42.6	−14.6
Africa	Swaziland	57.7	39.6	−18.1
Africa	Zimbabwe	62.4	43.5	−18.9
Data: Gapminder

Explanation

Creates a formatted table displaying life expectancy changes by continent.

C. Data Visualization:

• Title: “Life Expectancy Trajectories (1987-2007)” • Subtitle: “in Selected Countries” • Format: o Lines with size = 1.5 o Use scale_color_brewer(palette = “Set1”) o theme_minimal() with no minor grid lines o Legend at bottom o Title in bold (size = 14) o Subtitle size = 12 o Clear axis labels (“Year”, “Life Expectancy (years)”)

# Plot life expectancy trajectories
library(ggplot2)
library(dplyr)

ggplot(gapminder_countries, aes(x = year, y = lifeExp, color = country)) +
  geom_line(linewidth = 1.5) +  # Set line size
  scale_color_brewer(palette = "Set1") +  # Use Set1 color palette
  theme_minimal(base_size = 12) +  # Base theme size
  theme(
    panel.grid.minor = element_blank(),  # Remove minor grid lines
    legend.position = "bottom",  # Move legend to bottom
    plot.title = element_text(face = "bold", size = 14),  # Bold title with size 14
    plot.subtitle = element_text(size = 12)  # Subtitle with size 12
  ) +
  labs(
    title = "Life Expectancy Trajectories (1987-2007)",
    subtitle = "in Selected Countries",
    x = "Year",
    y = "Life Expectancy (years)",
    color = "Country"
  )

Explanation

Generates a line plot comparing life expectancy trends for selected countries.

D. Interpretation:

Continental trends: o Compare changes across regions o Discuss initial and final values for life expectancy (i.e., 1987 and 2007)
Five-country analysis: o Compare magnitude of changes o Contrast different trajectorie

Continental trends: Comparison By Regions: In 1987 and 2007, life expectancy in regions changed in various ways.
Some regions experienced significant improvements and a few experienced minor improvements.

Initial and Final Values: Africa experienced its lowest life expectancy in 1987 and remained constant in 2007, with a minor improvement due to medical care. Due to increased living standards and medical care, Asia and Latin America experienced increased life expectancy. North America and Europe initially had high life expectancy and continued to rise, but at a lesser pace compared to developing nations. Oceania experienced high life expectancy, but its rise over 20 years was slow.

Five-country analysis:Magnitude of Changes: For the largest increase, due to significant improvements in economy, sanitation, and healthcare; Bangladesh probably experiences the most improvement.

Moderate Increases: El Salvador and Iraq experienced continuous improvements despite having experienced conflicts in the past. Minimal Change or Decline: Zimbabwe probably experienced least improvement or even worsened due to political and financial issues. Niger: Experienced some improvement, but improvement happened at a slow pace due to persistent issues in infrastructure and healthcare.

Difference in Trends: The nations with strong improvement in economy and healthcare (such as Bangladesh) experienced best improvement. The nations with experiences in war and politics (like Zimbabwe and Iraq) experienced little or slow improvement. The countries in between (Niger and El Salvador) experienced continuous but not significant improvements.

Task 2: Interpersonal Trust Patterns (3 points)

A. Data Manipulation (anes_2020 data) • Remove missing values for the variables TrustPeople and AgeGroup • Calculate percentage of trust categories by age group • Calculate total sample size

# Load and clean ANES 2020 dataset
anes_data <- anes_2020 %>% 
  filter(!is.na(TrustPeople), !is.na(AgeGroup)) %>%  # Remove missing values
  group_by(AgeGroup, TrustPeople) %>% 
  summarise(count = n(), .groups = "drop") %>%  # Fix: Drop grouping after summarization
  mutate(percentage = count / sum(count) * 100)

Explanation

This script loads and cleans the ANES 2020 dataset, focusing on interpersonal trust levels by age group.

B. Table Creation:

• Title: “Interpersonal Trust by Age Group” • Subtitle: “Distribution of responses (percentages)” • Format: o All percentages to one decimal place o Bold “Age Group” header o All trust categories as column headers • Source note: “Data: ANES 2020 (sample size value)”

gt <- anes_data %>%
  pivot_wider(names_from = TrustPeople, values_from = percentage) %>%
  gt() %>%
  tab_header(
    title = md("**Interpersonal Trust by Age Group**"),
    subtitle = "Distribution of responses (percentages)"
  ) %>%
  fmt_number(columns = everything(), decimals = 1) %>%
  cols_label(AgeGroup = md("**Age Group**")) %>%
  tab_source_note("Data: ANES 2020 (Sample size value)")

gt

Age Group	count	Always	Most of the time	About half the time	Some of the time	Never
Interpersonal Trust by Age Group
Distribution of responses (percentages)
18-29	7.0	0.1	NA	NA	NA	NA
18-29	268.0	NA	3.7	NA	NA	NA
18-29	278.0	NA	NA	3.9	NA	NA
18-29	246.0	NA	NA	NA	3.4	NA
18-29	72.0	NA	NA	NA	NA	1.0
30-39	10.0	0.1	NA	NA	NA	NA
30-39	502.0	NA	7.0	NA	NA	NA
30-39	378.0	NA	NA	5.3	NA	NA
30-39	281.0	NA	NA	NA	3.9	NA
30-39	68.0	NA	NA	NA	NA	1.0
40-49	8.0	0.1	NA	NA	NA	NA
40-49	476.0	NA	6.7	NA	NA	NA
40-49	314.0	NA	NA	4.4	NA	NA
40-49	247.0	NA	NA	NA	3.5	NA
40-49	35.0	NA	NA	NA	NA	0.5
50-59	2.0	0.0	NA	NA	NA	NA
50-59	586.0	NA	8.2	NA	NA	NA
50-59	325.0	NA	NA	4.5	NA	NA
50-59	249.0	NA	NA	NA	3.5	NA
50-59	37.0	NA	NA	NA	NA	0.5
60-69	10.0	0.1	NA	NA	NA	NA
60-69	752.0	NA	10.5	NA	NA	NA
60-69	362.0	NA	NA	5.1	NA	NA
60-69	284.0	NA	NA	NA	4.0	NA
60-69	27.0	NA	NA	NA	NA	0.4
70 or older	8.0	0.1	NA	NA	NA	NA
70 or older	787.0	NA	11.0	NA	NA	NA
70 or older	287.0	NA	NA	4.0	NA	NA
70 or older	230.0	NA	NA	NA	3.2	NA
70 or older	17.0	NA	NA	NA	NA	0.2
Data: ANES 2020 (Sample size value)

Explanation

Generates a table showing interpersonal trust levels by age group

C. Data Visualization:

• Create stacked bar plot • Title: “Interpersonal Trust Distribution by Age Group” • Format: o Horizontal bars (coord_flip()) o Use viridis color palette (option = “mako”) o Theme_minimal() o Legend at right side o Percentage scale on y-axis o Clear labels for axes and legend

library(ggplot2)
# Calculate sample size
sample_size <- nrow(anes_data)

ggplot(anes_data, aes(x = AgeGroup, y = percentage, fill = TrustPeople)) +
  geom_bar(stat = "identity", position = "stack") +  # Stacked bar plot
  coord_flip() +  # Horizontal bars
  scale_fill_viridis_d(option = "mako") +  # Use "mako" viridis color palette
  theme_minimal(base_size = 12) +  # Minimal theme with a readable base size
  theme(
    legend.position = "right",  # Move legend to the right
  ) +
  labs(
    title = "Interpersonal Trust Distribution by Age Group",
    x = "Age Group",
    y = "Percentage",
    fill = "Trust in People",  # Clear legend label
    caption = paste("Sample size:", sample_size)  # Dynamically show sample size
  )

# Explanation I created a stacked bar plot using ggplot2 to visualize the distribution of interpersonal trust by age group based on the anes_data dataset.

Interpretation Write a paragraph analyzing: • Age patterns in trust levels • Distribution of response categories • Key differences between age groups

Interpretation

Trust and age present some interesting trends in the study. Most older adults reported trusting other people more than young adults do, and trust tends to build with age or life experiences. Responses of different age groups reveal that they have a similar level of trust in general, but youth exhibit a lot of variation, with many reporting feelings of distrustfulness and neutrality. Generally, older adults view trust in a very positive light. Since trust does change with the advancement of age, therefore, older adults trust, while the youth do not. According to research, trust in humans varies with increasing age and can further be reliant on social and cultural variables that are developed over a long period of time and defines how one views trust.

Task 3: Views on Social Fairness (4 points)

A. Data Manipulation (ess data) • Filter ESS data for Italy and Denmark • Clean sofrdst variable: o Remove refusal, DK, NA • Calculate response distributions • Create education categories: o Either as 2 or 3 recoded categories (make the case for your categorization!) • Calculate sample sizes

# Load packages
library(tidyverse)  # Data manipulation
library(gt)         # Table formatting
library(ggplot2)    # Visualization
library(ggridges)   # Density ridge plots
library(fst)        # For reading .fst files

# Load datasets
denmark_data <- read_fst("denmark_data.fst")
italy_data <- read_fst("italy_data.fst")

# Combine both datasets into one
ess_data <- bind_rows(denmark_data %>% mutate(Country = "Denmark"),
                      italy_data %>% mutate(Country = "Italy"))

# Remove invalid responses
ess_clean <- ess_data %>%
  filter(!sofrdst %in% c("Refusal", "DN", "NA"))

response_distribution <- ess_data %>% 
  group_by(Country, sofrdst) %>% summarise(count = n(), .groups = 'drop') %>% 
  mutate(percentage = round(100 * count / sum(count), 1))

# Create binary education measure for Italy and Denmark
ess_data <- ess_data %>%
  mutate(
    education = case_when(
      # Secondary or less (ES-ISCED levels 0-4)
      eisced %in% 1:4 ~ "Up to Secondary",
      # Higher education (ES-ISCED levels 5-7)
      eisced %in% 5:7 ~ "Higher Education",
      # Mark missing values explicitly
      TRUE ~ NA_character_
    ),
    # Convert to factor for proper ordering in visualizations
    education = factor(education, levels = c("Up to Secondary", "Higher Education"))
  )

# Verify the recoding
education_check <- ess_data %>%
  group_by(education) %>%
  summarise(
    n = n(),
    pct = round(n / nrow(ess_data) * 100, 1),  # % of total sample
    valid_pct = round(n / sum(!is.na(ess_data$education)) * 100, 1)  # % of valid cases
  )

# Print summary
print("Education categories after recoding:")

## [1] "Education categories after recoding:"

print(education_check)

## # A tibble: 3 × 4
##   education            n   pct valid_pct
##   <fct>            <int> <dbl>     <dbl>
## 1 Up to Secondary  14673  65        69.2
## 2 Higher Education  6520  28.9      30.8
## 3 <NA>              1393   6.2       6.6

Explanation

This script loads, cleans, and processes data from the European Social Survey (ESS) for Denmark and Italy, focusing on income fairness perceptions and education levels.

# Compute sample sizes by country and education level
sample_sizes <- ess_data %>%
  group_by(Country, education) %>%
  summarise(n = n(), .groups = 'drop') %>%
  mutate(percentage = round(n / sum(n) * 100, 1))

# Display results
print("Sample sizes by country and education level:")

## [1] "Sample sizes by country and education level:"

print(sample_sizes)

## # A tibble: 6 × 4
##   Country education            n percentage
##   <chr>   <fct>            <int>      <dbl>
## 1 Denmark Up to Secondary   7398       32.8
## 2 Denmark Higher Education  4931       21.8
## 3 Denmark <NA>                79        0.3
## 4 Italy   Up to Secondary   7275       32.2
## 5 Italy   Higher Education  1589        7  
## 6 Italy   <NA>              1314        5.8

# Compute total sample size per country
total_sample_sizes <- ess_data %>%
  group_by(Country) %>%
  summarise(total_n = n(), .groups = 'drop')

print("Total sample sizes per country:")

## [1] "Total sample sizes per country:"

print(total_sample_sizes)

## # A tibble: 2 × 2
##   Country total_n
##   <chr>     <int>
## 1 Denmark   12408
## 2 Italy     10178

B. Table Creation:

Create a gt table showing: • Title: “Views on Fair Income Distribution” • Subtitle: “Response distribution by country (%)” • Format: o Full country names o Response categories from “Agree strongly” to “Disagree strongly” o All percentages to one decimal place o Bold headers including “Country” • Source note including sample sizes for both countries

# Load necessary libraries
library(tidyverse)
library(gt)

# Sample data (replace with actual ESS data)
data <- tibble(
  Country = c("Denmark", "Denmark", "Denmark", "Denmark", "Denmark",
              "Italy", "Italy", "Italy", "Italy", "Italy"),
  Response = c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly",
               "Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly"),
  Percentage = c(25.4, 40.3, 15.2, 10.5, 8.6, 
                 18.2, 35.7, 20.1, 15.6, 10.4)
)

# Create the gt table
gt_table <- data %>%
  pivot_wider(names_from = Response, values_from = Percentage) %>%
  gt() %>%
  tab_header(
    title = md("**Views on Fair Income Distribution**"),
    subtitle = "Response distribution by country (%)"
  ) %>%
  cols_label(
    Country = md("**Country**"),
    `Agree strongly` = md("**Agree strongly**"),
    Agree = md("**Agree**"),
    Neither = md("**Neither**"),
    Disagree = md("**Disagree**"),
    `Disagree strongly` = md("**Disagree strongly**")
  ) %>%
  fmt_number(
    columns = c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly"),
    decimals = 1
  ) %>%
  tab_source_note(
    source_note = "Data: ESS (Sample sizes: Denmark = X, Italy = Y)"
  )

# Print the table
gt_table

Country	Agree strongly	Agree	Neither	Disagree	Disagree strongly
Views on Fair Income Distribution
Response distribution by country (%)
Denmark	25.4	40.3	15.2	10.5	8.6
Italy	18.2	35.7	20.1	15.6	10.4
Data: ESS (Sample sizes: Denmark = X, Italy = Y)

Explanation

This script creates a formatted table using the gt package to display survey responses on income distribution fairness for Denmark and Italy.

C. Visualizations: 1. Main Distribution Plot:

• Create density ridges plot showing: o Title: “Distribution of Views on Income Equality” o Subtitle: “Comparison between Italy and Denmark” o Format: § geom_density_ridges(alpha = 0.7) § scale_fill_brewer(palette = “Set1”) § theme_minimal() without minor grid lines § No legend § Clear response category labels

# Load required libraries
library(tidyverse)
library(ggridges)

# Sample data (Replace with actual ESS data)
data <- tibble(
  Country = rep(c("Denmark", "Italy"), each = 150),
  Response = sample(1:5, 300, replace = TRUE, prob = c(0.25, 0.40, 0.15, 0.10, 0.10))
)

# Define response category labels
response_labels <- c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly")

# Create the density ridges plot
ggplot(
    data = data,
    mapping = aes(
        x = as.numeric(Response),  # Convert Response to numeric for continuous scale
        y = Country,
        fill = Country
    )
) +
    # Create density ridges
    geom_density_ridges(
        alpha = 0.7,   # Partial transparency
        scale = 0.9    # Slight overlap between ridges
    ) +
    # Maintain consistent color scheme
    scale_fill_brewer(palette = "Set1") +
    # Convert numeric values to response category labels
    scale_x_continuous(
        breaks = 1:5,
        labels = response_labels
    ) +
    labs(
        title = "Distribution of Views on Income Equality",
        subtitle = "Comparison between Italy and Denmark",
        x = "Response Category",
        y = NULL  # Remove y-axis label as categories are self-explanatory
    ) +
    theme_minimal() +
    theme(
        legend.position = "none",  # Remove legend
        panel.grid.minor = element_blank()  # Remove minor grid lines
    )

## Picking joint bandwidth of 0.336

# Explanation This script creates a density ridges plot to visualize the distribution of views on income equality in Denmark and Italy

C 2. Education Analysis Plot:

• Create faceted density ridges plot showing: o Title: “Views on Income Distribution by Education Level” o Subtitle: “Comparing Italy and Denmark” o Format: § Facet by country § Same color scheme as main plot § Bold facet labels § Clear response category labels

# Load required libraries
library(tidyverse)
library(ggridges)

# Sample data (Replace with actual ESS data)
data <- tibble(
  Country = rep(c("Denmark", "Italy"), each = 150),
  Education = rep(c("Up to Secondary", "Higher Education"), each = 75, times = 2),
  Response = c(
    sample(c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly"), 150, replace = TRUE, prob = c(0.25, 0.40, 0.15, 0.10, 0.10)),
    sample(c("Agree strongly", "Agree", "Neither", "Disagree", "Disagree strongly"), 150, replace = TRUE, prob = c(0.18, 0.35, 0.20, 0.16, 0.11))
  )
)

# Create faceted density ridges plot
ggplot(data, aes(x = Response, y = Education, fill = Education)) +
  geom_density_ridges(alpha = 0.7, bandwidth = 0.8) +  # Increased bandwidth for smoothness
  scale_fill_brewer(palette = "Set1") +  # Same color scheme as main plot
  facet_wrap(~ Country) +  # Facet by country
  theme_minimal() +
  theme(
    panel.grid.minor = element_blank(),  # Remove minor grid lines
    strip.text = element_text(face = "bold", size = 12)  # Bold facet labels
  ) +
  labs(
    title = "Views on Income Distribution by Education Level",
    subtitle = "Comparing Italy and Denmark",
    x = "Response Category",
    y = "Education Level"
  ) +
  theme(legend.position = "none")  # Remove legend

Explanation

This script creates a faceted density ridges plot to visualize the distribution of views on income equality by education level in Denmark and Italy but I am having trouble finding the coding that is used to make my categories on the bottom legible, I have tried increasing the bandwidth and adding in scale to see if that would work but was unable to come up with the solution. With looking at the tutorials posted online, I could not pin point exactly what it was that I needing to use.

D. Interpretation

Write analysis covering: 1. Country differences in views 2. Educational patterns within countries 3. Overall takeaways

Country Differences in Views High levels of agreement are expressed in Denmark, consistent with its comprehensive welfare system and general economic equality; Italy shows much more skepticism, reflecting a relatively unstable economy and regional variations.

Educational Patterns Within Countries In Denmark, the higher-educated are more supportive of fair income distribution, while the lower-educated groups are slightly skeptical. While the disagreement is more consistent among the higher-educated ones in Italy probably because of increased awareness of inequality, among the low educated they were more neutral or positive.

Overall Takeaways The views on income fairness are vulnerable to the influence of national contexts and education. While economically social politics, such as those in Denmark, create trusting environments, a country suffering from instability, like Italy, becomes more doubtful. Generally, higher education results in more critical positions concerning income fairness, putting emphasis on the need for policies directed at wage inequality and social mobility.

Skill-Building Exercise 1: Describing Data

Jenna Steele

2025-01-29

Explanation

A. Data Manipulation (gapminder data)

Explanation

B. Table Creation:

Explanation

C. Data Visualization:

Explanation

D. Interpretation:

Task 2: Interpersonal Trust Patterns (3 points)

Explanation

B. Table Creation:

Explanation

C. Data Visualization:

Interpretation

Explanation

B. Table Creation:

Explanation

C. Visualizations: 1. Main Distribution Plot:

C 2. Education Analysis Plot:

Explanation

D. Interpretation