# Required packages
packages <- c(
"tidyverse",
# For data manipulation and ggplot2
"gt",
# For formatted tables
"gapminder",
# For gapminder dataset
"srvyr",
# For survey data
"fst",
# For reading ESS data
"ggridges"
# For density ridge plots
)# Install and load packages
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Adjuntando el paquete: 'srvyr'
## 
## 
## The following object is masked from 'package:stats':
## 
##     filter

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "gt"        "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "gapminder" "gt"        "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "srvyr"     "gapminder" "gt"        "lubridate" "forcats"   "stringr"  
##  [7] "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [13] "tidyverse" "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [19] "methods"   "base"     
## 
## [[5]]
##  [1] "fst"       "srvyr"     "gapminder" "gt"        "lubridate" "forcats"  
##  [7] "stringr"   "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"   
## [13] "ggplot2"   "tidyverse" "stats"     "graphics"  "grDevices" "utils"    
## [19] "datasets"  "methods"   "base"     
## 
## [[6]]
##  [1] "ggridges"  "fst"       "srvyr"     "gapminder" "gt"        "lubridate"
##  [7] "forcats"   "stringr"   "dplyr"     "purrr"     "readr"     "tidyr"    
## [13] "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics"  "grDevices"
## [19] "utils"     "datasets"  "methods"   "base"

file.exists("anes_2020.rda")

## [1] TRUE

load("anes_2020.rda")

head(gapminder)

## # A tibble: 6 × 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Afghanistan Asia       1952    28.8  8425333      779.
## 2 Afghanistan Asia       1957    30.3  9240934      821.
## 3 Afghanistan Asia       1962    32.0 10267083      853.
## 4 Afghanistan Asia       1967    34.0 11537966      836.
## 5 Afghanistan Asia       1972    36.1 13079460      740.
## 6 Afghanistan Asia       1977    38.4 14880372      786.

Notes: Name in the columes: country, continent, year, lifeExp, pop, gdpPercap Some notes may be written in Spanish for the author’s convenience.

Task 1: Global Life Expectancy Changes

life_exp_cont <- gapminder %>%
  filter(year %in% c(1987, 2007))%>%
  group_by(continent) %>%
  summarise(
    life_1987 = first(lifeExp),
    life_2007 = last(lifeExp),
    change = life_1987 - life_2007,
    avg_life = mean(lifeExp),
    .groups = "drop"
  ) %>%
  arrange(desc(avg_life))

life_exp_cont

## # A tibble: 5 × 5
##   continent life_1987 life_2007 change avg_life
##   <fct>         <dbl>     <dbl>  <dbl>    <dbl>
## 1 Oceania        76.3      80.2  -3.88     78.0
## 2 Europe         72        79.4  -7.42     75.6
## 3 Americas       70.8      73.7  -2.97     70.8
## 4 Asia           40.8      62.7 -21.9      67.8
## 5 Africa         65.8      43.5  22.3      54.1

Explanation of the code:

Only the years 1987 and 2007 are filtered. The data is grouped by continent. The average life expectancy for each year is calculated. The change in life expectancy (2007 - 1987) is calculated. Finally, the continents are ordered according to the magnitude of the change.

life_exp_country <- gapminder %>%
  filter(year >= 1985 & year <= 2007) %>%
  group_by(country,year) %>%
  summarise(
    avg_life = mean(lifeExp),
    .groups = "drop"
  ) %>%
  arrange(avg_life)


focal_countries <-life_exp_country %>%
  filter(country %in% c("Niger", "Bangladesh", "El Salvador", "Iraq", "Zimbabwe"))

print (life_exp_country)

## # A tibble: 710 × 3
##    country       year avg_life
##    <fct>        <int>    <dbl>
##  1 Rwanda        1992     23.6
##  2 Rwanda        1997     36.1
##  3 Sierra Leone  1992     38.3
##  4 Zambia        2002     39.2
##  5 Swaziland     2007     39.6
##  6 Somalia       1992     39.7
##  7 Sierra Leone  1997     39.9
##  8 Angola        1987     39.9
##  9 Zimbabwe      2002     40.0
## 10 Sierra Leone  1987     40.0
## # ℹ 700 more rows

focal_countries

## # A tibble: 25 × 3
##    country     year avg_life
##    <fct>      <int>    <dbl>
##  1 Zimbabwe    2002     40.0
##  2 Zimbabwe    2007     43.5
##  3 Niger       1987     44.6
##  4 Zimbabwe    1997     46.8
##  5 Niger       1992     47.4
##  6 Niger       1997     51.3
##  7 Bangladesh  1987     52.8
##  8 Niger       2002     54.5
##  9 Bangladesh  1992     56.0
## 10 Niger       2007     56.9
## # ℹ 15 more rows

library(gt)

enhanced_table <- life_exp_cont %>%
  select(-avg_life) %>%
  gt() %>%
  cols_label(
    continent = "Continent",
    life_1987 = "1987",
    life_2007 = "2007",
    change = "Change (2007-1987)"
  ) %>%  
  fmt_number(
    columns = c('life_2007','life_1987', 'change'),
    decimals = 1,
    use_seps = TRUE
    ) %>%
  tab_header(
    title = md("**Life Expectancy Changes by Continent**"),
    subtitle = md("**Average life expectancy in years**")
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels()
  ) %>%
  tab_source_note(
    source_note = "Data: gapminder"
  )

enhanced_table

Continent	1987	2007	Change (2007-1987)
Life Expectancy Changes by Continent
Average life expectancy in years
Oceania	76.3	80.2	−3.9
Europe	72.0	79.4	−7.4
Americas	70.8	73.7	−3.0
Asia	40.8	62.7	−21.9
Africa	65.8	43.5	22.3
Data: gapminder

gt() is used to create a nicely formatted table. Not only that, but the values are also formatted to a decimal. Afterwards, it is only a matter of fixing the text layout, where title, subtitle and source notes are added and finally, bold is applied to the column headers.

library(ggplot2)

ggplot(focal_countries, aes(x = year, y = avg_life, group = country, color = country)) +
  geom_line(size = 1.5) +    
  geom_point(size = 3) +     
  scale_color_brewer(palette = "Set1") +  
  theme_minimal() +          
  theme(
    legend.position = "bottom",
    legend.title = element_blank(), 
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 12),
    panel.grid.minor = element_blank()  # Remove minor grid lines
  ) +
  labs(
    title = "Life Expectancy Trajectories (1987-2007)",
    subtitle = "in Selected Countries",
    x = "Year",
    y = "Life Expectancy (years)"
  )

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Functions like ggplot2 are used to visualize the evolution of life expectancy.

While scale_color_brewer(palette = “Set1”) can be used for a light color palette. This ends up making theme_minimal() finish the job and gives us a cleaner design, where minor grid lines are removed for clarity and labels and titles are added with the appropriate format.

Interpreting changes in life expectancy (1987-2007)

The data show something that, in retrospect, is predictable: life expectancy has increased in all regions, but not uniformly. Each continent has its own pace of development, marked by social, economic and political factors that directly influence the quality and length of life of its inhabitants.

Continental trends

If we focus on the continents, we find a clear pattern. Africa and Asia show the most drastic changes in these twenty years. This is no coincidence: investment in public health, the decrease in infectious diseases and improved access to medicines have been decisive in this growth. But, although the change is significant, Africa still lags behind other continents, which reminds us that an increase does not necessarily mean equity.

Europe and North America, on the other hand, show a much smaller variation. This is logical: they were already starting from high values. In 1987, so the improvement is more gradual. Latin America and Oceania fall somewhere in between: notable progress, but without the extreme transformations of Africa and Asia. If we look at the evolution of the data, the gap between continents has narrowed, but there are still marked differences.

Analysis of the five countries

When we move from the continental scale to specific cases, the picture becomes more diverse. Bangladesh is the country that has made the most progress in terms of life expectancy. It is not difficult to understand why: in these years, the country has drastically improved its public health conditions, reducing infant mortality rates and preventable diseases.

El Salvador follows a similar trajectory, with constant improvement, although to a lesser extent. Iraq, on the other hand, has less stable growth. And here a factor comes into play that numbers alone do not fully explain: the political context. War and internal conflicts have affected its development in these decades, limiting what otherwise might have been more evident progress.

But the most extreme case is Zimbabwe. Not only did it have the least improvement, but at times its life expectancy decreased. It is no coincidence: the country has faced political, economic and health crises. It is a reminder that development is not linear and that structural problems can slow down or even reverse growth in quality of life.

Task 2: Interpersonal Trust Patterns

anes_clean <- anes_2020 %>%
  select(TrustPeople, AgeGroup) %>%
  drop_na()

summary(anes_clean)

##               TrustPeople          AgeGroup   
##  Always             :  45   18-29      : 871  
##  Most of the time   :3371   30-39      :1239  
##  About half the time:1944   40-49      :1080  
##  Some of the time   :1537   50-59      :1199  
##  Never              : 256   60-69      :1435  
##                             70 or older:1329

trust_distribution <- anes_clean %>%
  group_by(AgeGroup, TrustPeople) %>%
  summarise(count = n(), .groups = "drop") %>%
  mutate(percentage = (count / sum(count)) * 100)

trust_distribution

## # A tibble: 30 × 4
##    AgeGroup TrustPeople         count percentage
##    <fct>    <fct>               <int>      <dbl>
##  1 18-29    Always                  7     0.0979
##  2 18-29    Most of the time      268     3.75  
##  3 18-29    About half the time   278     3.89  
##  4 18-29    Some of the time      246     3.44  
##  5 18-29    Never                  72     1.01  
##  6 30-39    Always                 10     0.140 
##  7 30-39    Most of the time      502     7.02  
##  8 30-39    About half the time   378     5.28  
##  9 30-39    Some of the time      281     3.93  
## 10 30-39    Never                  68     0.951 
## # ℹ 20 more rows

To analyze the distribution of interpersonal trust across age groups, the data must first be grouped using the variables AgeGroup and TrustPeople. This allows us to count how many responses are in each combination of age group and trust level. This is essential, as it helps us understand how trust perceptions vary within each age cohort.

After counting the responses, the percentage of each category within its respective age group is calculated. This is achieved by dividing the number of responses in each trust level by the total responses in that group and then multiplying by 100 to obtain the percentage.

Finally, the results are printed to the console to verify that the calculations are correct.

library(gt)

trust_table <- trust_distribution %>%
  pivot_wider(names_from = TrustPeople, values_from = percentage) %>%
  gt() %>%
  cols_label(
    AgeGroup = "Age Group"
  ) %>%
  fmt_number(
    columns = -AgeGroup,
    decimals = 1
  ) %>%
  tab_header(
    title = md("**Interpersonal Trust by Age Group**"),
    subtitle = md("**Distribution of responses (percentages)**")
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels()
  ) %>%
  tab_source_note(
    source_note = md("**Data: ANES 2020**")
  )

trust_table

Age Group	count	Always	Most of the time	About half the time	Some of the time	Never
Interpersonal Trust by Age Group
Distribution of responses (percentages)
18-29	7.0	0.1	NA	NA	NA	NA
18-29	268.0	NA	3.7	NA	NA	NA
18-29	278.0	NA	NA	3.9	NA	NA
18-29	246.0	NA	NA	NA	3.4	NA
18-29	72.0	NA	NA	NA	NA	1.0
30-39	10.0	0.1	NA	NA	NA	NA
30-39	502.0	NA	7.0	NA	NA	NA
30-39	378.0	NA	NA	5.3	NA	NA
30-39	281.0	NA	NA	NA	3.9	NA
30-39	68.0	NA	NA	NA	NA	1.0
40-49	8.0	0.1	NA	NA	NA	NA
40-49	476.0	NA	6.7	NA	NA	NA
40-49	314.0	NA	NA	4.4	NA	NA
40-49	247.0	NA	NA	NA	3.5	NA
40-49	35.0	NA	NA	NA	NA	0.5
50-59	2.0	0.0	NA	NA	NA	NA
50-59	586.0	NA	8.2	NA	NA	NA
50-59	325.0	NA	NA	4.5	NA	NA
50-59	249.0	NA	NA	NA	3.5	NA
50-59	37.0	NA	NA	NA	NA	0.5
60-69	10.0	0.1	NA	NA	NA	NA
60-69	752.0	NA	10.5	NA	NA	NA
60-69	362.0	NA	NA	5.1	NA	NA
60-69	284.0	NA	NA	NA	4.0	NA
60-69	27.0	NA	NA	NA	NA	0.4
70 or older	8.0	0.1	NA	NA	NA	NA
70 or older	787.0	NA	11.0	NA	NA	NA
70 or older	287.0	NA	NA	4.0	NA	NA
70 or older	230.0	NA	NA	NA	3.2	NA
70 or older	17.0	NA	NA	NA	NA	0.2
Data: ANES 2020

library(ggplot2)

ggplot(trust_distribution, aes(x = AgeGroup, y = percentage, fill = TrustPeople)) +
  geom_bar(stat = "identity", position = "fill") + 
  scale_fill_viridis_d(option = "mako") + 
  theme_minimal() + 
  theme(
    legend.position = "right",
    legend.title = element_blank(),
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 12),
    axis.title.y = element_text(face = "bold"),
    panel.grid.major.x = element_blank()
  ) +
  labs(
    title = "Interpersonal Trust Distribution by Age Group",
    subtitle = "Proportion of responses in each category",
    x = "Age Group",
    y = "Proportion",
    caption = "Data: ANES 2020"
  ) +
  coord_flip()  # Voltea las barras para mejor lectura

The stacked bar chart is built using geom_bar(stat = “identity”, position = “fill”). This setting is key because it allows us to represent the distribution of responses as proportions within each age group rather than counting absolute responses. By using position = “fill”, we ensure that each bar has the same overall height (100%), making it easier to visually compare proportions across different categories. It is also good to know that if you use coord_flip(), it swaps the X and Y axes, turning the graph into a horizontal bar format. This choice is not merely aesthetic: by flipping the bars, we make it easier to read the labels of each age group and avoid overlapping text, making the graph clearer and more understandable.

Interpretation Interpersonal Trust Patterns:

The data show that interpersonal trust varies significantly across age groups. Generally speaking, older adults tend to express higher trust in people compared to younger groups, who are more skeptical. The distribution of responses also suggests that the perception of trust could be influenced by life experiences, social stability, and generation. In particular, the younger group presents the highest distrust, which could be related to the current social context and exposure to global crisis events. In summary, age seems to be a relevant factor in how people perceive the trustworthiness of others, and these data provide clear evidence of a generational pattern in interpersonal trust.

Task 3: Visualizations

library(tidyverse)
library(fst)  # Para leer archivos .fst

denmark_data <- read_fst("denmark_data.fst")
italy_data <- read_fst("italy_data.fst")

clean_denmark <- denmark_data %>%
  filter(!sofrdst %in% c("Refusal", "DK", "NA"))

clean_italy <- italy_data %>%
  filter(!sofrdst %in% c("Refusal", "DK", "NA"))

denmark_distribution <- clean_denmark %>%
  group_by(sofrdst) %>%
  summarise(count = n(), .groups = "drop") %>%
  mutate(percentage = (count / sum(count)) * 100)

italy_distribution <- clean_italy %>%
  group_by(sofrdst) %>%
  summarise(count = n(), .groups = "drop") %>%
  mutate(percentage = (count / sum(count)) * 100)

# Combinar ambos datasets para facilitar la comparación, similar a lo que paso con el grupo de continentes al inicio
fairness_distribution <- bind_rows(
  denmark_distribution %>% mutate(country = "Denmark"),
  italy_distribution %>% mutate(country = "Italy")
)

Combine both datasets to facilitate comparison, similar to what happened with the continent group at the beginning

library(gt)
fairness_table <- fairness_distribution %>%
  pivot_wider(names_from = sofrdst, values_from = percentage) %>%
  gt() %>%
  cols_label(
    country = "Country"
  ) %>%
  fmt_number(
    columns = -country,
    decimals = 1
  ) %>%
  tab_header(
    title = md("**Views on Fair Income Distribution**"),
    subtitle = md("**Response distribution by country (%)**")
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_column_labels()
  ) %>%
  tab_source_note(
    source_note = md("**Data: ESS (Denmark & Italy)**")
  )

fairness_table

count	Country	1	2	3	4	5	7	8	9	NA
Views on Fair Income Distribution
Response distribution by country (%)
79.0	Denmark	0.6	NA	NA	NA	NA	NA	NA	NA	NA
268.0	Denmark	NA	2.2	NA	NA	NA	NA	NA	NA	NA
325.0	Denmark	NA	NA	2.6	NA	NA	NA	NA	NA	NA
674.0	Denmark	NA	NA	NA	5.4	NA	NA	NA	NA	NA
202.0	Denmark	NA	NA	NA	NA	1.6	NA	NA	NA	NA
4.0	Denmark	NA	NA	NA	NA	NA	0.0	NA	0.0	NA
16.0	Denmark	NA	NA	NA	NA	NA	NA	0.1	NA	NA
10,836.0	Denmark	NA	NA	NA	NA	NA	NA	NA	NA	87.3
692.0	Italy	6.8	NA	NA	NA	NA	NA	NA	NA	NA
1,346.0	Italy	NA	13.2	NA	NA	NA	NA	NA	NA	NA
448.0	Italy	NA	NA	4.4	NA	NA	NA	NA	NA	NA
174.0	Italy	NA	NA	NA	1.7	NA	NA	NA	NA	NA
31.0	Italy	NA	NA	NA	NA	0.3	NA	NA	NA	NA
9.0	Italy	NA	NA	NA	NA	NA	0.1	NA	NA	NA
45.0	Italy	NA	NA	NA	NA	NA	NA	0.4	NA	NA
7,433.0	Italy	NA	NA	NA	NA	NA	NA	NA	NA	73.0
Data: ESS (Denmark & Italy)

Visualizing Response Distribution: “I hated this part”

library(ggplot2)

ggplot(fairness_distribution, aes(x = sofrdst, y = percentage, fill = country)) +
  geom_bar(stat = "identity", position = "dodge") +  
  scale_fill_brewer(palette = "Set1") +  
  theme_minimal() +  
  theme(
    legend.position = "top",
    legend.title = element_blank(),
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 12),
    axis.title.x = element_text(face = "bold")
  ) +
  labs(
    title = "Distribution of Views on Income Equality",
    subtitle = "Comparison between Italy and Denmark",
    x = "Response Category",
    y = "Percentage",
    caption = "Data: ESS"
  )

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

education_levels <- function(eisced) {
  case_when(
    eisced %in% c(0, 1, 2) ~ "Basic",       # Educación primaria y secundaria baja
    eisced %in% c(3, 4) ~ "Intermediate",   # Secundaria alta y post-secundaria
    eisced %in% c(5, 6) ~ "Higher",         # Universitario y superior
    TRUE ~ "Other"
  )
}

clean_denmark <- clean_denmark %>% mutate(education_group = education_levels(eisced))
clean_italy <- clean_italy %>% mutate(education_group = education_levels(eisced))

education_distribution <- bind_rows(
  clean_denmark %>%
    group_by(education_group, sofrdst) %>%
    summarise(count = n(), .groups = "drop") %>%
    mutate(percentage = (count / sum(count)) * 100) %>%
    mutate(country = "Denmark"),
  
  clean_italy %>%
    group_by(education_group, sofrdst) %>%
    summarise(count = n(), .groups = "drop") %>%
    mutate(percentage = (count / sum(count)) * 100) %>%
    mutate(country = "Italy")
)

ggplot(education_distribution, aes(x = sofrdst, y = percentage, fill = education_group)) +
  geom_bar(stat = "identity", position = "dodge") +  # Barras agrupadas para comparar
  scale_fill_brewer(palette = "Set1") +  # Colores diferenciados
  facet_wrap(~country) +  # Un gráfico por país
  theme_minimal() +
  theme(
    legend.position = "top",
    legend.title = element_blank(),
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(size = 12),
    axis.title.x = element_text(face = "bold")
  ) +
  labs(
    title = "Views on Income Distribution by Education Level",
    subtitle = "Comparing Italy and Denmark",
    x = "Response Category",
    y = "Percentage",
    caption = "Data: ESS"
  )

## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_bar()`).

At the beginning I made a mistake, which is that the variable that I was trying to use, education, does not exist with that name in the datasets for Italy and Denmark. To solve this, it was necessary to verify what the correct name of the variable representing the educational level is
I had to run the colnames code in RStudio to list the column names in each dataset. Find the column that corresponds to the educational level (it can be called something like education_level, edu, schooling, etc.). And Based on the names, the most appropriate variable to represent the educational level in the European Social Survey (ESS) is usually eisced, which corresponds to the International Standard Classification of Education (ISCED). This variable categorizes the educational level in a standard format.
After solving this, I was able to create a faceted graph to compare the distribution of opinions on fairness in income distribution in Italy and Denmark, segmented by educational level.

"The process crashed the computer about three times"

Interpretation of Results

Differences between countries:
Denmark shows a more homogeneous tendency towards equity in income distribution, with a higher percentage of people believing that income should be fairer. Italy, in contrast, has a more fragmented distribution, with more divided opinions on whether the distribution is fair or not.

Educational patterns:
The data suggest that as the level of education increases, the perception of income equity changes. In both countries, people with higher education tend to be more supportive of the idea of a more equitable distribution, while those with basic education have more dispersed responses.

Education appears to be a key factor in the perception of economic justice. However, cultural differences between Italy and Denmark also influence how income equity is perceived.

Skill-Building Exercise 1

Luis Rodriguez Banderas

2025-02-06

Task 1: Global Life Expectancy Changes

Task 2: Interpersonal Trust Patterns

Task 3: Visualizations