colnames(gapminder)
## [1] "country" "continent" "year" "lifeExp" "pop" "gdpPercap"
ncol(gapminder)
## [1] 6
# Each column that is not country, continent, or year is a statistic about the country
# 6 columns
nrow(gapminder)
## [1] 1704
# Each row is a different year for a country
# 1704 rows
sum(is.na(gapminder))
## [1] 0
colSums(is.na(gapminder))
## country continent year lifeExp pop gdpPercap
## 0 0 0 0 0 0
sum(is.null(gapminder))
## [1] 0
# 0 missing values
Which region has the closest average life expectancy to the average life expectancy for the world?
average_life_expectancy_world <- mean(gapminder$lifeExp)
avg_life_expectancy_by_region <- gapminder %>%
group_by(continent) %>%
summarise(avg_life_expectancy = mean(lifeExp))
ggplot(avg_life_expectancy_by_region, aes(x = continent, y = avg_life_expectancy)) +
geom_bar(stat = "identity", fill = "lightblue") +
geom_hline(yintercept = average_life_expectancy_world, linetype = "dashed", color = "red") +
geom_text(aes(label = round(avg_life_expectancy, 2)), vjust = -0.5) +
theme_minimal() +
labs(x = "Region", y = "Average Life Expectancy", title = "Average Life Expectancy by Region") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
How has GDP per Capita by Region increased from 2002 versus 2007?
gapminder_2002 <- gapminder %>%
filter(year == 2002)
gapminder_2007 <- gapminder %>%
filter(year == 2007)
avg_gdp_by_region_2002 <- gapminder_2002 %>%
group_by(continent) %>%
summarise(avg_gdpPercap_2002 = mean(gdpPercap))
avg_gdp_by_region_2007 <- gapminder_2007 %>%
group_by(continent) %>%
summarise(avg_gdpPercap_2007 = mean(gdpPercap))
avg_gdp_combined <- merge(avg_gdp_by_region_2002, avg_gdp_by_region_2007, by = "continent", all = TRUE)
avg_gdp_combined <- avg_gdp_combined %>%
pivot_longer(cols = c(avg_gdpPercap_2002, avg_gdpPercap_2007), names_to = "year", values_to = "avg_gdpPercap")
ggplot(data = avg_gdp_combined, aes(x = continent, y = avg_gdpPercap, fill = year)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.8), width = 0.8) +
geom_text(aes(label = round(avg_gdpPercap, 0)), vjust = -0.5, size = 3, color = "black", position = position_dodge(width = 0.8)) +
labs(x = "Continent", y = "Average GDP per Capita", title = "Average GDP per Capita by Region (2002 vs. 2007)") +
scale_fill_manual(values = c("lightblue", "lightgreen")) +
scale_y_continuous(labels = scales::comma) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
theme(legend.position = "bottom")
How has Total Population Grown in each Region from 1952 to 2007? Who had the most Growth?
gapminder_1952 <- gapminder %>%
filter(year == 1952)
gapminder_2007 <- gapminder %>%
filter(year == 2007)
total_population_1952 <- gapminder_1952 %>%
group_by(continent) %>%
summarise(total_pop_1952 = sum(pop))
total_population_2007 <- gapminder_2007 %>%
group_by(continent) %>%
summarise(total_pop_2007 = sum(pop))
population_combined <- merge(total_population_1952, total_population_2007, by = "continent", all = TRUE)
population_combined$growth_rate <- ((population_combined$total_pop_2007 - population_combined$total_pop_1952) / population_combined$total_pop_1952) * 100
population_combined_long <- tidyr::pivot_longer(population_combined, cols = c(total_pop_1952, total_pop_2007), names_to = "year", values_to = "total_population")
population_combined_long <- population_combined_long %>%
group_by(continent, year) %>%
mutate(label_y = ifelse(year == "total_pop_1952", total_population + max(total_population) * 0.05, total_population))
ggplot(data = population_combined_long, aes(x = continent, y = total_population, fill = year)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.8), width = 0.6) +
geom_text(aes(label = total_population, y = label_y), vjust = -0.5, size = 3, color = "black", position = position_dodge(width = 0.8), data = population_combined_long) +
labs(x = "Continent", y = "Total Population", title = "Total Population by Region (1952 vs. 2007)") +
scale_fill_manual(values = c("lightblue", "lightgreen")) +
scale_y_continuous(labels = scales::comma) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
theme(legend.position = "bottom")