colnames(gapminder)
## [1] "country"   "continent" "year"      "lifeExp"   "pop"       "gdpPercap"
ncol(gapminder)
## [1] 6
# Each column that is not country, continent, or year is a statistic about the country
# 6 columns
nrow(gapminder)
## [1] 1704
# Each row is a different year for a country
# 1704 rows

sum(is.na(gapminder))
## [1] 0
colSums(is.na(gapminder))
##   country continent      year   lifeExp       pop gdpPercap 
##         0         0         0         0         0         0
sum(is.null(gapminder))
## [1] 0
# 0 missing values

Which region has the closest average life expectancy to the average life expectancy for the world?

average_life_expectancy_world <- mean(gapminder$lifeExp)

avg_life_expectancy_by_region <- gapminder %>%
  group_by(continent) %>%
  summarise(avg_life_expectancy = mean(lifeExp))

ggplot(avg_life_expectancy_by_region, aes(x = continent, y = avg_life_expectancy)) +
  geom_bar(stat = "identity", fill = "lightblue") +
  geom_hline(yintercept = average_life_expectancy_world, linetype = "dashed", color = "red") +
  geom_text(aes(label = round(avg_life_expectancy, 2)), vjust = -0.5) +
  theme_minimal() +
  labs(x = "Region", y = "Average Life Expectancy", title = "Average Life Expectancy by Region") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

How has GDP per Capita by Region increased from 2002 versus 2007?

gapminder_2002 <- gapminder %>%
  filter(year == 2002)

gapminder_2007 <- gapminder %>%
  filter(year == 2007)

avg_gdp_by_region_2002 <- gapminder_2002 %>%
  group_by(continent) %>%
  summarise(avg_gdpPercap_2002 = mean(gdpPercap))

avg_gdp_by_region_2007 <- gapminder_2007 %>%
  group_by(continent) %>%
  summarise(avg_gdpPercap_2007 = mean(gdpPercap))

avg_gdp_combined <- merge(avg_gdp_by_region_2002, avg_gdp_by_region_2007, by = "continent", all = TRUE)

avg_gdp_combined <- avg_gdp_combined %>%
  pivot_longer(cols = c(avg_gdpPercap_2002, avg_gdpPercap_2007), names_to = "year", values_to = "avg_gdpPercap")

ggplot(data = avg_gdp_combined, aes(x = continent, y = avg_gdpPercap, fill = year)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8), width = 0.8) +
  geom_text(aes(label = round(avg_gdpPercap, 0)), vjust = -0.5, size = 3, color = "black", position = position_dodge(width = 0.8)) +
  labs(x = "Continent", y = "Average GDP per Capita", title = "Average GDP per Capita by Region (2002 vs. 2007)") +
  scale_fill_manual(values = c("lightblue", "lightgreen")) +
  scale_y_continuous(labels = scales::comma) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  theme(legend.position = "bottom")

How has Total Population Grown in each Region from 1952 to 2007? Who had the most Growth?

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

gapminder_2007 <- gapminder %>%
  filter(year == 2007)

total_population_1952 <- gapminder_1952 %>%
  group_by(continent) %>%
  summarise(total_pop_1952 = sum(pop))

total_population_2007 <- gapminder_2007 %>%
  group_by(continent) %>%
  summarise(total_pop_2007 = sum(pop))

population_combined <- merge(total_population_1952, total_population_2007, by = "continent", all = TRUE)

population_combined$growth_rate <- ((population_combined$total_pop_2007 - population_combined$total_pop_1952) / population_combined$total_pop_1952) * 100

population_combined_long <- tidyr::pivot_longer(population_combined, cols = c(total_pop_1952, total_pop_2007), names_to = "year", values_to = "total_population")

population_combined_long <- population_combined_long %>%
  group_by(continent, year) %>%
  mutate(label_y = ifelse(year == "total_pop_1952", total_population + max(total_population) * 0.05, total_population))

ggplot(data = population_combined_long, aes(x = continent, y = total_population, fill = year)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8), width = 0.6) +
  geom_text(aes(label = total_population, y = label_y), vjust = -0.5, size = 3, color = "black", position = position_dodge(width = 0.8), data = population_combined_long) +
  labs(x = "Continent", y = "Total Population", title = "Total Population by Region (1952 vs. 2007)") +
  scale_fill_manual(values = c("lightblue", "lightgreen")) +
  scale_y_continuous(labels = scales::comma) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  theme(legend.position = "bottom")