The goal is to analyze global trends in life expectancy and GDP using the Gapminder dataset.
Here, We are loading the necessary libraries to manipulate data, create visualizations, and work with geographic data for mapping.
library(dplyr)
library(ggplot2)
library(gapminder)
library(ggrepel)
library(rnaturalearth)
library(rnaturalearthdata)
# Load Data
data <- gapminder
# Display the names of all variables (columns) in the dataset
variables <- colnames(data)
print(variables)
## [1] "country" "continent" "year" "lifeExp" "pop" "gdpPercap"
# Load necessary libraries
library(dplyr)
# Check for null values (NA) in the dataset
null_values <- sapply(data, function(x) sum(is.na(x)))
print(null_values)
## country continent year lifeExp pop gdpPercap
## 0 0 0 0 0 0
# Find how many countries have data
num_countries <- n_distinct(data$country)
print(paste("Number of countries with data:", num_countries))
## [1] "Number of countries with data: 142"
# Find how many years of data are present for each country
years_per_country <- data %>%
group_by(country) %>%
summarise(num_years = n()) %>%
arrange(desc(num_years))
# Display the number of years data is available for each country
print(years_per_country)
## # A tibble: 142 × 2
## country num_years
## <fct> <int>
## 1 Afghanistan 12
## 2 Albania 12
## 3 Algeria 12
## 4 Angola 12
## 5 Argentina 12
## 6 Australia 12
## 7 Austria 12
## 8 Bahrain 12
## 9 Bangladesh 12
## 10 Belgium 12
## # ℹ 132 more rows
This chunk filters the data for the year 2007 and groups it by continent. It calculates key metrics for comparative analysis and outputs the summarized results.
continent_summary <- data %>%
filter(year == 2007) %>%
group_by(continent) %>%
summarise(
avg_lifeExp = mean(lifeExp),
avg_gdpPercap = mean(gdpPercap),
total_population = sum(pop)
)
# Display the summary
print(continent_summary)
## # A tibble: 5 × 4
## continent avg_lifeExp avg_gdpPercap total_population
## <fct> <dbl> <dbl> <dbl>
## 1 Africa 54.8 3089. 929539692
## 2 Americas 73.6 11003. 898871184
## 3 Asia 70.7 12473. 3811953827
## 4 Europe 77.6 25054. 586098529
## 5 Oceania 80.7 29810. 24549947
To analyze how Population, life expectancy, and GDP per Capita has changed over time for different continents.
# Population
ggplot(data, aes(x = year, y = pop, group = continent, color = continent)) +
geom_line(stat = "summary", fun = "sum") +
labs(title = "Population Over Time by Continent",
x = "Year",
y = "Total Population") +
theme_minimal()
# Life Expectancy
ggplot(data, aes(x = year, y = lifeExp, group = continent, color = continent)) +
geom_line(stat = "summary", fun = "mean") +
labs(title = "Average Life Expectancy Over Time by Continent",
x = "Year",
y = "Life Expectancy") +
theme_minimal()
# GDP Per Capita
ggplot(data, aes(x = year, y = gdpPercap, group = continent, color = continent)) +
geom_line(stat = "summary", fun = "mean") +
labs(title = "Average GDP Per Capita Growth Over Time by Continent",
x = "Year",
y = "GDP Per Capita") +
theme_minimal()
To analyze how Population, life expectancy, and GDP per Capita has changed over time for a subset of countries (India, China, USA).
# Focusing on a few countries to avoid clutter
selected_countries <- c("India", "China", "United States")
#Population
ggplot(data %>% filter(country %in% selected_countries), aes(x = year, y = pop, group = country, color = country)) +
geom_line() +
labs(title = "Population Over Time for Selected Countries",
x = "Year",
y = "Population") +
theme_minimal()
#life Expectancy
ggplot(data %>% filter(country %in% selected_countries), aes(x = year, y = lifeExp, group = country, color = country)) +
geom_line() +
labs(title = "Life Expectancy Over Time for Selected Countries",
x = "Year",
y = "Life Expectancy") +
theme_minimal()
# GDP Per Capita
ggplot(data %>% filter(country %in% selected_countries), aes(x = year, y = gdpPercap, color = country)) +
geom_line() +
labs(title = "GDP Per Capita Growth Over Time for Selected Countries",
x = "Year",
y = "GDP Per Capita") +
theme_minimal()
Identify the top 10 countries with the highest life expectancy in 2007.
# Filter for the year 2007 and sort by life expectancy
top_10_lifeExp_2007 <- data %>%
filter(year == 2007) %>%
arrange(desc(lifeExp)) %>%
head(10)
# Plot life expectancy using a dot plot
ggplot(top_10_lifeExp_2007, aes(x = lifeExp, y = reorder(country, lifeExp))) +
geom_point(size = 4, color = "blue") +
geom_segment(aes(xend = lifeExp, yend = country), color = "gray") +
labs(title = "Top 10 Countries by Life Expectancy (2007)",
x = "Life Expectancy",
y = "Country") +
theme_minimal() +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank())
Identify the top 10 countries with the highest population in 2007.
# Filter for the year 2007 and sort by population
top_10_pop_2007 <- data %>%
filter(year == 2007) %>%
arrange(desc(pop)) %>%
head(10)
# Plot population
ggplot(top_10_pop_2007, aes(x = reorder(country, pop), y = pop)) +
geom_bar(stat = "identity", fill = "tomato") +
coord_flip() +
labs(title = "Top 10 Countries by Population (2007)",
x = "Country",
y = "Population") +
theme_minimal()
World map for all countires and their GDP per capita in 2007.
# Load the world map data (country shapes)
world_map <- ne_countries(scale = "medium", returnclass = "sf")
# Prepare data: Filter for 2007 and select relevant columns
top_10_gdp_2007 <- data %>%
filter(year == 2007) %>%
select(country, gdpPercap)
# Merge the world map with the GDP data
world_map_gdp <- world_map %>%
left_join(top_10_gdp_2007, by = c("name" = "country"))
# Plot the map
ggplot(world_map_gdp) +
geom_sf(aes(fill = gdpPercap), color = "white", size = 0.1) + # Fill with GDP per capita
scale_fill_viridis_c(option = "viridis", trans = "log") + # Log scale for better visualization
labs(title = "World Map of GDP per Capita (2007)",
fill = "GDP per Capita") +
theme_minimal()
This chunk creates a scatter plot to explore the correlation between GDP per capita and life expectancy.Continents are color-coded to highlight regional patterns, and a log scale is applied to GDP for better visual distribution.
ggplot(data, aes(x = gdpPercap, y = lifeExp)) +
geom_point(aes(color = continent), alpha = 0.7) + # Scatter plot with color by continent
scale_x_log10() + # Log scale for better visibility
labs(title = "Life Expectancy vs. GDP Per Capita",
x = "GDP per Capita (Log Scale)",
y = "Life Expectancy") +
theme_minimal() # Clean and minimal theme
This chunk creates a scatter plot to analyze the relationship between population size and life expectancy.Continents are color-coded to reveal regional differences, and a log scale is applied to population for better distribution.
# Draw a scatter plot for Population vs Life Expectancy
ggplot(data, aes(x = pop, y = lifeExp)) +
geom_point(aes(color = continent), alpha = 0.7) + # Scatter plot with color by continent
scale_x_log10() + # Log scale for better visibility
labs(title = "Life Expectancy vs. Population",
x = "Population (Log Scale)",
y = "Life Expectancy") +
theme_minimal() # Clean and minimal theme
This chunk creates a scatter plot to examine how population size correlates with GDP per capita.Continents are color-coded, and log scales are applied to both axes to better capture variations across countries.
# Draw a scatter plot for Population vs GDP per Capita
ggplot(data, aes(x = pop, y = gdpPercap)) +
geom_point(aes(color = continent), alpha = 0.7) + # Scatter plot with color by continent
scale_x_log10() + # Log scale for better visibility
scale_y_log10() + # Log scale for better visibility
labs(title = "Population vs. GDP Per Capita",
x = "Population (Log Scale)",
y = "GDP Per Capita (Log Scale)") +
theme_minimal() # Clean and minimal theme
This chunk generates a scatter plot to study how GDP per capita correlates with life expectancy across continents. Separate faceted subplots for each continent allow for a detailed comparison of regional patterns.
ggplot(data, aes(x = gdpPercap, y = lifeExp)) +
geom_point(aes(color = continent), alpha = 0.7) +
scale_x_log10() +
labs(title = "Life Expectancy vs GDP per Capita by Continent",
x = "GDP per Capita (Log Scale)",
y = "Life Expectancy") +
facet_wrap(~continent) + # Create separate plots for each continent
theme_minimal()
This chunk creates a scatter plot to explore how population size correlates with life expectancy across continents. Faceted subplots for each continent help highlight distinct regional trends and patterns.
ggplot(data, aes(x = pop, y = lifeExp)) +
geom_point(aes(color = continent), alpha = 0.7) +
scale_x_log10() + # Log scale for better visibility
labs(title = "Life Expectancy vs Population by Continent",
x = "Population (Log Scale)",
y = "Life Expectancy") +
facet_wrap(~continent) + # Create separate plots for each continent
theme_minimal() # Clean and minimal theme
This chunk generates a scatter plot to analyze the correlation between population size and GDP per capita across continents. Log scales are applied to both axes, and faceted subplots provide a granular view of regional differences.
ggplot(data, aes(x = pop, y = gdpPercap)) +
geom_point(aes(color = continent), alpha = 0.7) +
scale_x_log10() + # Log scale for better visibility
scale_y_log10() + # Log scale for better visibility
labs(title = "GDP per Capita vs Population by Continent",
x = "Population (Log Scale)",
y = "GDP per Capita (Log Scale)") +
facet_wrap(~continent) + # Create separate plots for each continent
theme_minimal() # Clean and minimal theme
This chunk creates a boxplot to compare the distribution of life expectancy among continents. It highlights variations in median values, interquartile ranges, and potential outliers within each continent.
ggplot(data, aes(x = continent, y = lifeExp, fill = continent)) +
geom_boxplot() +
labs(title = "Life Expectancy Distribution by Continent",
x = "Continent",
y = "Life Expectancy") +
theme_minimal()
Population: Asia leads global population growth, with India and China dominating. Africa shows high populations with economic challenges.
Life Expectancy: Continents like Oceania and Europe have the highest life expectancy, while Africa lags but shows improvement.
GDP Per Capita: Europe and Oceania are economically dominant, while Africa remains the lowest despite gradual growth.
Relationships:
Top Performers (2007): Japan leads in life expectancy, while China and India dominate population figures.
Distribution Trends: Africa has the widest variability in life expectancy, while Oceania and Europe show stability.
Global improvements in life expectancy and GDP per capita are evident, but significant regional disparities persist, particularly in Africa.