The goal of this analysis is to study economic inequality across continents using the Gapminder dataset. We will employ metrics like the Gini coefficient and percentile comparisons to understand disparities in GDP per capita.
We filter the Gapminder dataset for the year 2007 and select only the
necessary columns: country
, year
,
gdpPercap
, and continent
.
filtered_data <- gapminder %>%
select(country, year, gdpPercap, continent) %>% # Select necessary columns
filter(year == 2007) %>% # Filter for the year 2007
filter(!is.na(gdpPercap)) # Ensure no missing values in the data
We calculate the Gini coefficient and other statistics for each continent to quantify economic inequality.
inequality_table <- filtered_data %>%
group_by(continent) %>%
summarise(
Continent = first(continent),
Gini_Coefficient = ineq::Gini(gdpPercap, na.rm = TRUE),
Average_GDP_Per_Capita = mean(gdpPercap, na.rm = TRUE),
Median_GDP_Per_Capita = median(gdpPercap, na.rm = TRUE),
Minimum_GDP_Per_Capita = min(gdpPercap, na.rm = TRUE),
Maximum_GDP_Per_Capita = max(gdpPercap, na.rm = TRUE),
Standard_Deviation = sd(gdpPercap, na.rm = TRUE),
IQR = IQR(gdpPercap, na.rm = TRUE)
) %>%
arrange(desc(Gini_Coefficient)) # Sort by Gini coefficient
kable(inequality_table, caption = "Inequality Table: Gini Coefficient and GDP Statistics by Continent") %>%
kable_styling(full_width = FALSE, position = "center", bootstrap_options = c("striped", "hover", "condensed"))
continent | Continent | Gini_Coefficient | Average_GDP_Per_Capita | Median_GDP_Per_Capita | Minimum_GDP_Per_Capita | Maximum_GDP_Per_Capita | Standard_Deviation | IQR |
---|---|---|---|---|---|---|---|---|
Asia | Asia | 0.5682388 | 12473.027 | 4471.062 | 944.0000 | 47306.99 | 14154.937 | 19863.982 |
Africa | Africa | 0.5489265 | 3089.033 | 1452.267 | 277.5519 | 13206.48 | 3618.163 | 3130.550 |
Americas | Americas | 0.3998920 | 11003.032 | 8948.103 | 1201.6372 | 42951.65 | 9713.209 | 6249.221 |
Europe | Europe | 0.2627290 | 25054.482 | 28054.066 | 5937.0295 | 49357.19 | 11800.340 | 19006.064 |
Oceania | Oceania | 0.0775772 | 29810.188 | 29810.188 | 25185.0091 | 34435.37 | 6540.991 | 4625.179 |
We create a boxplot to visualize the distribution of GDP per capita across continents.
boxplot_gdp <- ggplot(filtered_data, aes(x = continent, y = gdpPercap, fill = continent)) +
geom_boxplot(outlier.colour = "red", outlier.size = 2) + # Highlight outliers
geom_jitter(alpha = 0.3, color = "black", size = 0.5) + # Add jitter for data points
scale_y_log10() +
labs(
title = "GDP per Capita Distribution by Continent (2007)",
x = "Continent",
y = "GDP per Capita (log scale)"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for better readability
print(boxplot_gdp)
We compare the top and bottom 10% average GDP per capita and calculate the ratio to highlight disparities.
percentile_analysis <- filtered_data %>%
group_by(continent) %>%
summarise(
Continent = first(continent),
Top_10_Percent_Average = mean(sort(gdpPercap, decreasing = TRUE)[1:floor(0.1 * n())], na.rm = TRUE),
Bottom_10_Percent_Average = mean(sort(gdpPercap)[1:floor(0.1 * n())], na.rm = TRUE),
Ratio_Top_to_Bottom = Top_10_Percent_Average / Bottom_10_Percent_Average
)
kable(percentile_analysis, caption = "Percentile Analysis: Top 10% vs Bottom 10% GDP Per Capita") %>%
kable_styling(full_width = FALSE, position = "center", bootstrap_options = c("striped", "hover", "condensed"))
continent | Continent | Top_10_Percent_Average | Bottom_10_Percent_Average | Ratio_Top_to_Bottom |
---|---|---|---|---|
Africa | Africa | 12188.98 | 434.2142 | 28.071361 |
Americas | Americas | 39635.44 | 1975.4791 | 20.063713 |
Asia | Asia | 44725.05 | 1003.3134 | 44.577348 |
Europe | Europe | 42513.20 | 7280.5349 | 5.839296 |
Oceania | Oceania | 34435.37 | 25185.0091 | 1.367296 |
We visualize the ratio of the top 10% to bottom 10% GDP per capita using a bar plot.
barplot_ratio <- ggplot(percentile_analysis, aes(x = reorder(Continent, -Ratio_Top_to_Bottom), y = Ratio_Top_to_Bottom, fill = Continent)) +
geom_bar(stat = "identity", color = "black") +
labs(
title = "Top 10% vs Bottom 10% GDP Per Capita Ratio by Continent",
x = "Continent",
y = "Ratio (Top 10% / Bottom 10%)"
) +
theme_minimal()
print(barplot_ratio)