Problem Definition

This analysis investigates how life expectancy varies across continents over time using the Gapminder dataset.

Data Wrangling

# Load necessary libraries
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
library(gapminder)
## Warning: package 'gapminder' was built under R version 4.4.2
# Step 1: Filter data for the year 2007
data_2007 <- gapminder %>%
  filter(year == 2007)

# Step 2: Select relevant columns
selected_data <- data_2007 %>%
  select(country, continent, lifeExp, gdpPercap, pop)

# Step 3: Add a new column for GDP in billions
mutated_data <- selected_data %>%
  mutate(gdp_in_billions = gdpPercap * pop / 1e9)

# Display the summary table
summary_table <- mutated_data %>%
  group_by(continent) %>%
  summarise(
    avg_life_expectancy = mean(lifeExp, na.rm = TRUE),
    avg_gdp_per_capita = mean(gdpPercap, na.rm = TRUE)
  ) %>%
  arrange(desc(avg_life_expectancy))

print(summary_table)
## # A tibble: 5 × 3
##   continent avg_life_expectancy avg_gdp_per_capita
##   <fct>                   <dbl>              <dbl>
## 1 Oceania                  80.7             29810.
## 2 Europe                   77.6             25054.
## 3 Americas                 73.6             11003.
## 4 Asia                     70.7             12473.
## 5 Africa                   54.8              3089.
# Scatter plot of GDP per Capita vs Life Expectancy
scatter_plot <- ggplot(mutated_data, aes(x = gdpPercap, y = lifeExp, color = continent)) +
  geom_point(size = 3, alpha = 0.7) +
  scale_x_log10() +
  labs(
    title = "Relationship Between GDP Per Capita and Life Expectancy (2007)",
    x = "GDP Per Capita (Log Scale)",
    y = "Life Expectancy",
    color = "Continent"
  ) +
  theme_minimal()

# Display the plot
print(scatter_plot)